library(tidyverse)
library(readxl)
library(cowplot)
library(IHW)
library(ggforce)
selected_measure <- c("sd.b", "sd.m", "mean", "mad", "dip", "precision", "direction", "chr",
"refgene.pos", "cpg.loc", "dhs", "probe.type", "icc.b", "icc.m")
measure_col <- c("sd.b"="#7570B3", "sd.m"="#E7298A", "mean"="#A6CEE3",
"mad"="#1F78B4", "dip"="#B2DF8A", "precision"="#FFFF00", "icc.b"="#FB9A99",
"icc.m"="#E31A1C", "refgene.pos"="#FDBF6F", "chr"="#A6761D", "cpg.loc"="#1B9E77",
"dhs"="#949494", "direction"="#7FFFD4", "probe.type" ="#FFE4E1")
There are two representative examples for the dependency between p-value and covariate “mean”. The silimar relationship for other covariates could be found in Figure S3.
# load data
EWAS33 <- readRDS("Data/dataset/EWAS33.RDS")
i <- "mean"
covariate_type <- "ordinal"
# histogram of all raw p-values
p1 <- ggplot(data = EWAS33, aes(x = P.value, y = stat(density))) +
geom_histogram(fill = "#00AFBB", color = "black", binwidth = 0.05 , size = 0.05, boundary = 0) +
theme_classic() +
theme(axis.title = element_text(size = 10),
axis.text = element_text(size = 8))
# scatter plot of mean and p-values
result <- ihw(EWAS33$P.value, covariates = EWAS33[[i]], alpha = 0.05, covariate_type = covariate_type)
result_df <- as.data.frame(result)
f <- ecdf(result_df$covariate)
result_df$percentile_covariate <- f(result_df$covariate)
p2 <- ggplot(data = result_df, aes(x = percentile_covariate, y = -log10(pvalue))) +
geom_hex(bins = 100) +
theme_classic() +
theme(axis.title = element_text(size = 10),
axis.text = element_text(size = 8))
# histogram stratified by covariate mean
result_df$ihwGroup <- groups_by_filter(result_df$group, 5)
p3 <- ggplot(data = result_df, aes(x = pvalue, y = stat(density))) +
geom_histogram(fill = "#00AFBB", color = "black", binwidth = 0.05, boundary = 0, size = 0.05) +
facet_wrap(~ ihwGroup, nrow = 1) +
theme_classic() +
theme(axis.title = element_text(size = 10),
axis.text = element_text(size = 8))
# put figures together
plots <- align_plots(p1, p3, align = "v", axis = "l")
top_row <- plot_grid(plots[[1]], p2, nrow = 1, align = "h")
panelA <- plot_grid(top_row, plots[[2]], ncol = 1)
panelA
# load data
EWAS14 <- readRDS("Data/dataset/EWAS14.RDS")
i <- "mean"
covariate_type <- "ordinal"
# histogram of all raw p-values
p1 <- ggplot(data = EWAS14, aes(x = P.value, y = stat(density))) +
geom_histogram(fill = "#00AFBB", color = "black", binwidth = 0.05 , size = 0.05, boundary = 0) +
theme_classic() +
theme(axis.title = element_text(size = 10),
axis.text = element_text(size = 8))
# scatter plot of mean and p-values
result <- ihw(EWAS14$P.value, covariates = EWAS14[[i]], alpha = 0.05, covariate_type = covariate_type)
result_df <- as.data.frame(result)
f <- ecdf(result_df$covariate)
result_df$percentile_covariate <- f(result_df$covariate)
p2 <- ggplot(data = result_df, aes(x = percentile_covariate, y = -log10(pvalue))) +
geom_hex(bins = 100) +
theme_classic() +
theme(axis.title = element_text(size = 10),
axis.text = element_text(size = 8))
# histogram stratified by covariate mean
result_df$ihwGroup <- groups_by_filter(result_df$group, 5)
p3 <- ggplot(data = result_df, aes(x = pvalue, y = stat(density))) +
geom_histogram(fill = "#00AFBB", color = "black", binwidth = 0.05, boundary = 0, size = 0.05) +
facet_wrap(~ ihwGroup, nrow = 1) +
theme_classic() +
theme(axis.title = element_text(size = 10),
axis.text = element_text(size = 8))
# put figures together
plots <- align_plots(p1, p3, align = "v", axis = "l")
top_row <- plot_grid(plots[[1]], p2, nrow = 1, align = "h")
panelB <- plot_grid(top_row, plots[[2]], ncol = 1)
panelB
fs3 <- plot_grid(panelA, panelB, ncol = 1, align = "vl", labels = c("A", "B"), label_size = 18)
fs3
We performed simulations to assess the type I error and power of the proposed omnibus test and benchmarked against the naïve tests.
We use omnibus test to evaluate the 14 EWAS-related covariates’ informativeness for each EWAS data set. Covariates with p-value less than 0.05 are thought to be informative.
omnibus_result <- read.csv("Data/omnibus.result.csv", header = TRUE)
p <- omnibus_result %>%
filter(measure %in% selected_measure) %>%
ggplot(aes(x = reorder(measure, -log10(p.value), FUN = median), y = -log10(p.value))) +
geom_boxplot(aes(fill = measure), outlier.shape = NA, fatten = 1) +
geom_sina(size = 1, alpha = 0.3, shape = 16) +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
coord_flip() +
xlab("") +
scale_fill_manual(values = measure_col) +
theme_classic() +
theme(legend.position = "none",
axis.text = element_text(size = 8),
axis.title = element_text(size = 10))
p