Evaluation of covariates

1.Dependency between p-value and covariate
2.Comparison of Omnibus test and naïve test
3.Omnibus test to assess the informativeness of EWAS-related covariates

Library R packages and basic setttings

library(tidyverse)
library(readxl)
library(cowplot)
library(IHW)
library(ggforce)

selected_measure <- c("sd.b", "sd.m", "mean", "mad", "dip", "precision", "direction", "chr",
                      "refgene.pos", "cpg.loc", "dhs", "probe.type", "icc.b", "icc.m")
measure_col <- c("sd.b"="#7570B3", "sd.m"="#E7298A", "mean"="#A6CEE3", 
                 "mad"="#1F78B4", "dip"="#B2DF8A", "precision"="#FFFF00", "icc.b"="#FB9A99", 
                 "icc.m"="#E31A1C", "refgene.pos"="#FDBF6F", "chr"="#A6761D", "cpg.loc"="#1B9E77",
                 "dhs"="#949494", "direction"="#7FFFD4", "probe.type" ="#FFE4E1")

1.Dependency between p-value and covariate

There are two representative examples for the dependency between p-value and covariate “mean”. The silimar relationship for other covariates could be found in Figure S3.

1.1 panel A : differential methylation happens in low methylation region

# load data
EWAS33 <- readRDS("Data/dataset/EWAS33.RDS")
i <- "mean"
covariate_type <- "ordinal"

# histogram of all raw p-values
p1 <- ggplot(data = EWAS33, aes(x = P.value, y = stat(density))) + 
  geom_histogram(fill = "#00AFBB", color = "black", binwidth = 0.05 , size = 0.05, boundary = 0) +
  theme_classic() +
  theme(axis.title = element_text(size = 10),
        axis.text = element_text(size = 8))

# scatter plot of mean and p-values
result <- ihw(EWAS33$P.value, covariates = EWAS33[[i]], alpha = 0.05, covariate_type = covariate_type)
result_df <- as.data.frame(result)
f <- ecdf(result_df$covariate)
result_df$percentile_covariate <- f(result_df$covariate)
p2 <- ggplot(data = result_df, aes(x = percentile_covariate, y = -log10(pvalue))) + 
  geom_hex(bins = 100) + 
  theme_classic() +
  theme(axis.title = element_text(size = 10),
        axis.text = element_text(size = 8))

# histogram stratified by covariate mean
result_df$ihwGroup <- groups_by_filter(result_df$group, 5)
p3 <- ggplot(data = result_df, aes(x = pvalue, y = stat(density))) + 
  geom_histogram(fill = "#00AFBB", color = "black", binwidth = 0.05, boundary = 0, size = 0.05) +
  facet_wrap(~ ihwGroup, nrow = 1) + 
  theme_classic()  +
  theme(axis.title = element_text(size = 10),
        axis.text = element_text(size = 8))

# put figures together
plots <- align_plots(p1, p3, align = "v", axis = "l")
top_row <- plot_grid(plots[[1]], p2, nrow = 1, align = "h")
panelA <- plot_grid(top_row, plots[[2]], ncol = 1)
panelA

1.2 panel B: differential methylation happens in median methylation region

# load data
EWAS14 <- readRDS("Data/dataset/EWAS14.RDS")
i <- "mean"
covariate_type <- "ordinal"

# histogram of all raw p-values
p1 <- ggplot(data = EWAS14, aes(x = P.value, y = stat(density))) + 
  geom_histogram(fill = "#00AFBB", color = "black", binwidth = 0.05 , size = 0.05, boundary = 0) +
  theme_classic()  +
  theme(axis.title = element_text(size = 10),
        axis.text = element_text(size = 8))

# scatter plot of mean and p-values
result <- ihw(EWAS14$P.value, covariates = EWAS14[[i]], alpha = 0.05, covariate_type = covariate_type)
result_df <- as.data.frame(result)
f <- ecdf(result_df$covariate)
result_df$percentile_covariate <- f(result_df$covariate)
p2 <- ggplot(data = result_df, aes(x = percentile_covariate, y = -log10(pvalue))) + 
  geom_hex(bins = 100) + 
  theme_classic()  +
  theme(axis.title = element_text(size = 10),
        axis.text = element_text(size = 8))

# histogram stratified by covariate mean
result_df$ihwGroup <- groups_by_filter(result_df$group, 5)
p3 <- ggplot(data = result_df, aes(x = pvalue, y = stat(density))) + 
  geom_histogram(fill = "#00AFBB", color = "black", binwidth = 0.05, boundary = 0, size = 0.05) +
  facet_wrap(~ ihwGroup, nrow = 1) + 
  theme_classic() +
  theme(axis.title = element_text(size = 10),
        axis.text = element_text(size = 8))

# put figures together
plots <- align_plots(p1, p3, align = "v", axis = "l")
top_row <- plot_grid(plots[[1]], p2, nrow = 1, align = "h")
panelB <- plot_grid(top_row, plots[[2]], ncol = 1)
panelB

1.3 put all panels together

fs3 <- plot_grid(panelA, panelB, ncol = 1, align = "vl", labels = c("A", "B"), label_size = 18)
fs3

2.Comparison of Omnibus test and naïve test

We performed simulations to assess the type I error and power of the proposed omnibus test and benchmarked against the naïve tests.

3.Omnibus test to assess the informativeness of EWAS-related covariates

We use omnibus test to evaluate the 14 EWAS-related covariates’ informativeness for each EWAS data set. Covariates with p-value less than 0.05 are thought to be informative.

omnibus_result <- read.csv("Data/omnibus.result.csv", header = TRUE)
p <- omnibus_result %>%
  filter(measure %in% selected_measure) %>%
  ggplot(aes(x = reorder(measure, -log10(p.value), FUN = median), y = -log10(p.value))) +
  geom_boxplot(aes(fill = measure), outlier.shape = NA, fatten = 1) + 
  geom_sina(size = 1, alpha = 0.3, shape = 16) +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  coord_flip() +
  xlab("") +
  scale_fill_manual(values = measure_col) +
  theme_classic() +
  theme(legend.position = "none", 
        axis.text = element_text(size = 8),
        axis.title = element_text(size = 10))
p