Normalization functions
Global sum normalization
<- function(matrix, transform_data = TRUE){
global.norm # This function will perform normalization based in the global AUC of each sample
# and the median of such intensities across samples
<- colSums(matrix, na.rm = TRUE)
colsum <- median(colsum)
colsum.median <- data.frame(matrix(NA, nrow = nrow(matrix), ncol = ncol(matrix)))
norm.matrix for(col in 1:ncol(matrix)){
<- (matrix[,col] / colsum[col]) * colsum.median
norm.matrix[,col]
}colnames(norm.matrix) <- colnames(matrix)
rownames(norm.matrix) <- rownames(matrix)
if(transform_data == TRUE){
<- log2(norm.matrix + 1)
norm.matrix
}
return(norm.matrix)
}
Median normalization
<- function(matrix, transform_data = TRUE){
median.norm # This function will perform data normalization based in the median AUC of each sample
<- apply(matrix, 2, FUN = median, na.rm = TRUE)
colmedian <- mean(colmedian)
colmedian.mean <- data.frame(matrix(NA, nrow = nrow(matrix), ncol = ncol(matrix)))
norm.matrix for(col in 1:ncol(matrix)){
<- (matrix[,col] / colmedian[col]) * colmedian.mean
norm.matrix[,col]
}colnames(norm.matrix) <- colnames(matrix)
rownames(norm.matrix) <- rownames(matrix)
if(transform_data == TRUE){
<- log2(norm.matrix + 1)
norm.matrix
}return(norm.matrix)
}
Mean normalization
<- function(matrix, transform_data = TRUE){
mean.norm # This function will perform data normalization based in the mean AUC of each sample
<- colMeans(matrix, na.rm = TRUE)
colmean <- mean(colmean)
colmean.mean <- data.frame(matrix(NA, nrow = nrow(matrix), ncol = ncol(matrix)))
norm.matrix for(col in 1:ncol(matrix)){
<- (matrix[,col] / colmean[col]) * colmean.mean
norm.matrix[,col]
}colnames(norm.matrix) <- colnames(matrix)
rownames(norm.matrix) <- rownames(matrix)
if(transform_data == TRUE){
<- log2(norm.matrix + 1)
norm.matrix
}return(norm.matrix)
}
VSN normalization
<- function(matrix){
vsn.norm # This functions tries to adjust the data to the vsn normalization
<- suppressMessages(vsn::justvsn(as.matrix(matrix)))
norm.matrix <- as.data.frame(norm.matrix)
norm.matrix return(norm.matrix)
}
Cyclic LOESS normalization
<- function(matrix){
cycloess.norm # This functions tries to adjust the data to the vsn normalization
<- log2(matrix)
norm.matrix <- limma::normalizeCyclicLoess(norm.matrix, method = 'fast')
norm.matrix <- as.data.frame(norm.matrix)
norm.matrix rownames(norm.matrix) <- rownames(matrix)
return(norm.matrix)
}
Function to plot all normalization methods together
# Helper function to generate boxplots
<- function(df, my_x, my_y){
plot_boxplot ggplot(df,
aes(x = {{my_x}},
y = {{my_y}})) +
geom_boxplot() +
theme_bw() +
theme(plot.title = element_text(face = 'bold', hjust = 0.5))
}
# -------------------------------------------------------------------------
<- function(df){
normalize_by_all
<- gather(df, key = 'SampleID', value = 'AUC')
no_norm.df <- plot_boxplot(no_norm.df, SampleID, AUC) +
no_norm.plot labs(title = 'No normalization') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank())
<- global.norm(df)
gi_norm.df <- gather(gi_norm.df, key = 'SampleID', value = 'AUC')
gi_norm.df <- plot_boxplot(gi_norm.df, SampleID, AUC) +
gi_norm.plot labs(title = 'Normalization by global AUC',
y = 'Normalized AUC') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank())
<- mean.norm(df)
mean_norm.df <- gather(mean_norm.df, key = 'SampleID', value = 'AUC')
mean_norm.df <- plot_boxplot(mean_norm.df, SampleID, AUC) +
mean_norm.plot labs(title = 'Normalization by mean',
y = 'Normalized AUC') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank())
<- median.norm(df)
median_norm.df <- gather(median_norm.df, key = 'SampleID', value = 'AUC')
median_norm.df <- plot_boxplot(median_norm.df, SampleID, AUC) +
median_norm.plot labs(title = 'Normalization by median',
y = 'Normalized AUC') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank())
<- vsn.norm(df)
vsn_norm.df <- gather(vsn_norm.df, key = 'SampleID', value = 'AUC')
vsn_norm.df <- plot_boxplot(vsn_norm.df, SampleID, AUC) +
vsn_norm.plot labs(title = 'VSN',
y = 'Normalized AUC') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank())
<- cycloess.norm(df)
cycloess_norm.df <- gather(cycloess_norm.df, key = 'SampleID', value = 'AUC')
cycloess_norm.df <- plot_boxplot(cycloess_norm.df, SampleID, AUC) +
cycloess_norm.plot labs(title = 'LOESS normalization',
y = 'Normalized AUC') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank())
<- ggarrange(no_norm.plot, gi_norm.plot, mean_norm.plot,
all_norm.plot
median_norm.plot, vsn_norm.plot, cycloess_norm.plot,nrow = 2,
ncol = 3)
return(all_norm.plot)
}