Normalization functions
Global sum normalization
global.norm <- function(matrix, transform_data = TRUE){
# This function will perform normalization based in the global AUC of each sample
# and the median of such intensities across samples
colsum <- colSums(matrix, na.rm = TRUE)
colsum.median <- median(colsum)
norm.matrix <- data.frame(matrix(NA, nrow = nrow(matrix), ncol = ncol(matrix)))
for(col in 1:ncol(matrix)){
norm.matrix[,col] <- (matrix[,col] / colsum[col]) * colsum.median
}
colnames(norm.matrix) <- colnames(matrix)
rownames(norm.matrix) <- rownames(matrix)
if(transform_data == TRUE){
norm.matrix <- log2(norm.matrix + 1)
}
return(norm.matrix)
}Median normalization
median.norm <- function(matrix, transform_data = TRUE){
# This function will perform data normalization based in the median AUC of each sample
colmedian <- apply(matrix, 2, FUN = median, na.rm = TRUE)
colmedian.mean <- mean(colmedian)
norm.matrix <- data.frame(matrix(NA, nrow = nrow(matrix), ncol = ncol(matrix)))
for(col in 1:ncol(matrix)){
norm.matrix[,col] <- (matrix[,col] / colmedian[col]) * colmedian.mean
}
colnames(norm.matrix) <- colnames(matrix)
rownames(norm.matrix) <- rownames(matrix)
if(transform_data == TRUE){
norm.matrix <- log2(norm.matrix + 1)
}
return(norm.matrix)
} Mean normalization
mean.norm <- function(matrix, transform_data = TRUE){
# This function will perform data normalization based in the mean AUC of each sample
colmean <- colMeans(matrix, na.rm = TRUE)
colmean.mean <- mean(colmean)
norm.matrix <- data.frame(matrix(NA, nrow = nrow(matrix), ncol = ncol(matrix)))
for(col in 1:ncol(matrix)){
norm.matrix[,col] <- (matrix[,col] / colmean[col]) * colmean.mean
}
colnames(norm.matrix) <- colnames(matrix)
rownames(norm.matrix) <- rownames(matrix)
if(transform_data == TRUE){
norm.matrix <- log2(norm.matrix + 1)
}
return(norm.matrix)
}VSN normalization
vsn.norm <- function(matrix){
# This functions tries to adjust the data to the vsn normalization
norm.matrix <- suppressMessages(vsn::justvsn(as.matrix(matrix)))
norm.matrix <- as.data.frame(norm.matrix)
return(norm.matrix)
}Cyclic LOESS normalization
cycloess.norm <- function(matrix){
# This functions tries to adjust the data to the vsn normalization
norm.matrix <- log2(matrix)
norm.matrix <- limma::normalizeCyclicLoess(norm.matrix, method = 'fast')
norm.matrix <- as.data.frame(norm.matrix)
rownames(norm.matrix) <- rownames(matrix)
return(norm.matrix)
}Function to plot all normalization methods together
# Helper function to generate boxplots
plot_boxplot <- function(df, my_x, my_y){
ggplot(df,
aes(x = {{my_x}},
y = {{my_y}})) +
geom_boxplot() +
theme_bw() +
theme(plot.title = element_text(face = 'bold', hjust = 0.5))
}
# -------------------------------------------------------------------------
normalize_by_all <- function(df){
no_norm.df <- gather(df, key = 'SampleID', value = 'AUC')
no_norm.plot <- plot_boxplot(no_norm.df, SampleID, AUC) +
labs(title = 'No normalization') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank())
gi_norm.df <- global.norm(df)
gi_norm.df <- gather(gi_norm.df, key = 'SampleID', value = 'AUC')
gi_norm.plot <- plot_boxplot(gi_norm.df, SampleID, AUC) +
labs(title = 'Normalization by global AUC',
y = 'Normalized AUC') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank())
mean_norm.df <- mean.norm(df)
mean_norm.df <- gather(mean_norm.df, key = 'SampleID', value = 'AUC')
mean_norm.plot <- plot_boxplot(mean_norm.df, SampleID, AUC) +
labs(title = 'Normalization by mean',
y = 'Normalized AUC') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank())
median_norm.df <- median.norm(df)
median_norm.df <- gather(median_norm.df, key = 'SampleID', value = 'AUC')
median_norm.plot <- plot_boxplot(median_norm.df, SampleID, AUC) +
labs(title = 'Normalization by median',
y = 'Normalized AUC') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank())
vsn_norm.df <- vsn.norm(df)
vsn_norm.df <- gather(vsn_norm.df, key = 'SampleID', value = 'AUC')
vsn_norm.plot <- plot_boxplot(vsn_norm.df, SampleID, AUC) +
labs(title = 'VSN',
y = 'Normalized AUC') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank())
cycloess_norm.df <- cycloess.norm(df)
cycloess_norm.df <- gather(cycloess_norm.df, key = 'SampleID', value = 'AUC')
cycloess_norm.plot <- plot_boxplot(cycloess_norm.df, SampleID, AUC) +
labs(title = 'LOESS normalization',
y = 'Normalized AUC') +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank())
all_norm.plot <- ggarrange(no_norm.plot, gi_norm.plot, mean_norm.plot,
median_norm.plot, vsn_norm.plot, cycloess_norm.plot,
nrow = 2,
ncol = 3)
return(all_norm.plot)
}