Contents
Library R packages and basic setttings
library(tidyverse)
library(circlize)
library(ComplexHeatmap)
library(UpSetR)
library(ggplotify)
library(missMethyl)
library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
library(limma)
library(DT)
library(cowplot)
library(IHW)
selected_measure <- c("mean", "sd.b", "mad", "precision", "dhs", "direction",
"chr", "refgene.pos", "dip", "probe.type", "cpg.loc", "sd.m")
1. Stratified histograms by covariate “p-value”
# stratified histogram of EWAS29's p-value by z-score transfromed p-value from EWAS28
EWAS28 <- readRDS("Data/dataset/EWAS28.RDS")
EWAS29 <- readRDS("Data/dataset/EWAS29.RDS")
overlap <- intersect(rownames(EWAS28), rownames(EWAS29))
EWAS28_pvalue <- EWAS28[overlap, "P.value"]
EWAS29_pvalue <- EWAS29[overlap, "P.value"]
## z-score tranformation for EWAS28's p-value
ztransform <- function(p.value, tol = 1E-15) {
# Transform p-values to z-scores
p.value[p.value <= tol] <- tol
p.value[p.value >= 1 - tol] <- 1 - tol
z <- -qnorm(p.value)
return(z)
}
EWAS28_pvalue_zscore <- ztransform(EWAS28_pvalue)
## histogram of all raw p-values
p1 <- ggplot(data = EWAS29[overlap, ], aes(x = P.value, y = stat(density))) +
geom_histogram(fill = "#00AFBB", color = "black", binwidth = 0.05, size = 0.05, boundary = 0) +
scale_y_continuous(breaks = c(0, 0.4, 0.8, 1.2)) +
theme_classic() +
theme(axis.title = element_text(size = 10),
axis.text = element_text(size = 8))
## histogram stratified by covariate p-value
ihw.obj <- ihw(EWAS29_pvalue, EWAS28_pvalue_zscore, alpha = 0.05, covariate_type = "ordinal")
result_df <- as.data.frame(ihw.obj)
result_df$ihwGroup <- groups_by_filter(result_df$group, 4)
p2 <- ggplot(data = result_df, aes(x = pvalue, y = stat(density))) +
geom_histogram(fill = "#00AFBB", color = "black", binwidth = 0.05, boundary = 0, size = 0.05) +
facet_wrap(~ ihwGroup, nrow = 1, scales = "fixed") +
scale_y_continuous(breaks = c(0, 0.4, 0.8, 1.2)) +
theme_classic() +
theme(axis.title = element_text(size = 10),
axis.text = element_text(size = 8))
## put figures together
fs18 <- plot_grid(p1, p2, nrow = 1, labels = "AUTO", rel_widths = c(1, 3))
fs18

2. Significant signals change across methods and covariates
# read in adjusted p-values to a list
pvalues <- list()
for(s in c("AdaPT", "BL", "CAMT", "FDRreg", "IHW")){
pvalues[[s]] <- readRDS(paste0("Data/pvalue_as_covariate_", s, ".RDS"))
}
# calculate the significant DMPs' number
discovery <- as.data.frame(lapply(pvalues, function(x)(apply(x, 2, function(x)sum(x < 0.05)))))
discovery <- as.data.frame(t(discovery))
# extract BH and ST signal numbers
BH <- unique(discovery$BH)
ST <- unique(discovery$ST)
plot_data <- discovery %>%
select(c(selected_measure, "pvalue"))
plot_data <- as.matrix((t(plot_data) - ST)/ST)*100
cell_lable <- paste0(round(plot_data, 0), "%")
cell_lable <- matrix(cell_lable, nrow = nrow(plot_data), ncol = ncol(plot_data))
col_fun = colorRamp2(c(-200, 0, 400), c("royalblue", "white", "red"), space = "RGB")
panel_A <- Heatmap(plot_data,
col = col_fun,
rect_gp = gpar(col = "grey", lwd = 0.25),
cell_fun = function(j, i, x, y, width, height, fill){
if(abs(plot_data[i, j]) > 1) grid.text(cell_lable[i, j], x, y, gp = gpar(fontsize = 6))
},
clustering_distance_rows = "euclidean",
clustering_distance_columns = "euclidean",
clustering_method_rows = "ward.D",
clustering_method_columns = "ward.D",
show_row_dend = FALSE,
show_column_dend = FALSE,
heatmap_legend_param = list(title = "Color Key",
labels_gp = gpar(fontsize = 5),
labels = c("-100%", 0, "200%", "400%"),
direction = "horizontal",
title_position = "lefttop",
title_gp = gpar(fontsize = 8),
grid_height = unit(2, "mm"),
grid_width = unit(4, "mm")),
row_names_gp = gpar(fontsize = 8),
column_names_gp = gpar(fontsize = 6),
column_names_side ="top")
draw(panel_A, heatmap_legend_side = "bottom")

panel_A <- grid.grabExpr(draw(panel_A, heatmap_legend_side = "bottom"))
2. The performance of covariate p-value by method IHW
2.1 The intersections
raw_probes <- rownames(pvalues[[1]])
sig_probes <- lapply(pvalues, function(x)apply(x, 2, function(x)raw_probes[x < 0.05]))
selected_method <- "IHW"
upset_list <- sig_probes[[selected_method]]
# upset
selected_measure <- c(selected_measure, "pvalue", "ST")
panel_B <- upset(fromList(upset_list),
sets = selected_measure,
order.by = "freq",
point.size = 1.5,
line.size = 0.7,
mb.ratio = c(0, 1))
panel_B <- as.ggplot(panel_B)
panel_B

2.2 GO and KEGG enrichment analysis
## look at the results obtained by covariate pvalue with method IHW
sigCpGs <- upset_list[["pvalue"]]
gst <- gometh(sig.cpg=sigCpGs, all.cpg=raw_probes, collection = "GO", prior.prob = TRUE, array.type = "450K", plot.bias = FALSE)
# Top 5 GO categories
top10_GO <- topGO(gst, number=10)
datatable(top10_GO, rownames = TRUE, filter= "none", options = list(pageLength = 5, scrollX=T))
top10_GO$ratio <- top10_GO$DE / top10_GO$N
# plot top 10 terms
top10_GO$Term <- factor(top10_GO$Term, levels = as.character(top10_GO$Term[10:1]))
panel_C <- ggplot(data = top10_GO, mapping = aes(x = Term, y = ratio)) +
geom_col(aes(fill = FDR), width = 0.5) +
coord_flip() +
theme_bw() +
ylab("Ratio") +
theme(legend.position = "none",
axis.title.y = element_blank(),
axis.text = element_text(size = 6))
panel_C

# Top 10 KEGG categories
kegg <- gometh(sig.cpg = sigCpGs, all.cpg = raw_probes, collection = "KEGG", prior.prob=TRUE)
datatable(topKEGG(kegg, number = 10), rownames = TRUE, filter="none", options = list(pageLength = 5, scrollX=T))
# organize figures
top_row <- plot_grid(panel_A, panel_C, rel_widths = c(1, 1),
labels = c("A", "C"), label_size = 18)
f6 <- plot_grid(top_row, panel_B, ncol = 1, labels = c("", "B"), label_size = 18)
3. GO and KEGG analysis for ST results
reference <- upset_list[["ST"]]
# Top 20 GO categories
gst <- gometh(sig.cpg=reference, all.cpg=raw_probes, collection = "GO", prior.prob = TRUE, array.type = "450K", plot.bias = FALSE)
datatable(topGO(gst, number=20), rownames = TRUE, filter="none", options = list(pageLength = 5, scrollX=T))
# Top 10 KEGG categories
kegg <- gometh(sig.cpg = reference, all.cpg = raw_probes, collection = "KEGG", prior.prob=TRUE)
datatable(topKEGG(kegg, number = 10), rownames = TRUE, filter="none", options = list(pageLength = 5, scrollX=T))
4. Validate the IHW results by method CAMT
4.1 The intersections
selected_method <- "CAMT"
upset_list <- sig_probes[[selected_method]]
# convert list to data frame
selected <- unique(unlist(upset_list))
upset_df <- as.data.frame(matrix(nrow = length(selected), ncol = length(upset_list),
dimnames = list(selected, names(upset_list))))
for(i in 1:nrow(upset_df)){
for(j in 1:ncol(upset_df)){
if(rownames(upset_df)[i] %in% upset_list[[colnames(upset_df)[j]]]){
upset_df[i, j] <- 1
}else{
upset_df[i, j] <- 0
}
}
}
# add annotations for EWAS28 significant probes by method BH
EWAS28_sigprobes <- readRDS("Data/EWAS28_sigprobes.RDs")
upset_df$EWAS28 <- ifelse(rownames(upset_df) %in% EWAS28_sigprobes, "yes", "no")
# upset
fs19_1 <- upset(upset_df,
sets = selected_measure,
order.by = "freq",
queries = list(list(query = elements, params = list("EWAS28", "yes"), color = "red", active = TRUE)),
mb.ratio = c(0.6, 0.4),
point.size = 1.5,
line.size = 0.5)
fs19_1 <- as.ggplot(fs19_1)
fs19_1

4.2 GO and KEGG enrichment analysis
sigCpGs <- upset_list[["pvalue"]]
gst <- gometh(sig.cpg=sigCpGs, all.cpg=raw_probes, collection = "GO", prior.prob = TRUE, array.type = "450K", plot.bias = FALSE)
# Top 10 GO categories
top10_GO <- topGO(gst, number=10)
datatable(top10_GO, rownames = TRUE, filter="none", options = list(pageLength = 5, scrollX=T))
top10_GO$ratio <- top10_GO$DE / top10_GO$N
top10_GO$Term <- factor(top10_GO$Term, levels = as.character(top10_GO$Term[10:1]))
options(digits = 2)
fs19_2 <- ggplot(data = top10_GO, mapping = aes(x = Term, y = ratio)) +
geom_col(aes(fill = FDR), width = 0.5) +
coord_flip() +
theme_classic() +
theme(axis.title.y = element_blank(),
legend.title = element_text(size = 7),
legend.text = element_text(size = 5),
legend.key.size = unit(4, "mm"))
fs19_2

# Top 10 KEGG categories
kegg <- gometh(sig.cpg = sigCpGs, all.cpg = raw_probes, collection = "KEGG", prior.prob=TRUE)
datatable(topKEGG(kegg, number = 10), rownames = TRUE, filter="none", options = list(pageLength = 5, scrollX=T))
# organize figures
fs19 <- plot_grid(fs19_1, fs19_2, ncol = 1, labels = c("A", "B"), label_size = 18,
rel_heights = c(2, 1))