Contents


Library R packages and basic settings

library(tidyverse)
library(readxl)
library(ggpubr)
options(stringsAsFactors = FALSE)

1. Tissue distribution of EWAS data sets


The 61 EWAS data sets’ tissue sources and sample sizes are displayed in the following plot, and the majority data sets are form blood.

sampleinfo <- read.csv("Data/DatasetInfo.csv", header = TRUE)
sampleinfo <- arrange(sampleinfo, desc(Size))
sampleinfo$Tissue <- factor(sampleinfo$Tissue, levels = unique(sampleinfo$Tissue))
sampleinfo <- arrange(sampleinfo, Tissue, desc(Size))
sampleinfo$New_ID <- factor(sampleinfo$New_ID, levels = unique(sampleinfo$New_ID))
my_palette <- c("#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#E6AB02",  "#A6761D", "#A6CEE3", "#1F78B4", "#B2DF8A", "#FFFF00", "#FB9A99", "#E31A1C", "#949494", "#FDBF6F", "#FF7F00", "#CAB2D6", "#2F4F4F", "#7FFFD4", "#FFE4E1", "#00E5EE")
fs1 <- ggplot(data = sampleinfo, aes(x = New_ID, y = Size, group = Tissue)) +
  geom_bar(aes(fill = Tissue), stat = "identity", width = 0.5) + 
  scale_fill_manual(values = my_palette) +
  theme_classic() + 
  theme(axis.text.x = element_text(angle = 75, size = 5, hjust = 0, vjust = 0.1), 
        axis.title.x = element_blank(),
        legend.position = "top",
        legend.key.size = unit(0.3, "cm"),
        title = element_text(size = 10),
        axis.text.y = element_text(size = 8),
        legend.text = element_text(size = 8))
fs1

2. QQ-plot of p-values


The genomic inflation of association p-values before and after surrogate variable analysis are shown as QQ-plot. As the source code about QQ-plot is provided in code folder, file qqplot.RData is not upload here beacuse the file size is too large.

load("Data/qqplot.RData")
myplots <- list()
for(i in 1:length(myplot)){
  myplots[[i]] <- myplot[[i]] + ggtitle(names(myplot)[i]) +
    theme(axis.title = element_text(size = 6), axis.text = element_text(size = 6), plot.title = element_text(hjust = 0.5, size = 8))
}
plots <- ggarrange(plotlist = myplots, ncol = 4, nrow = 4, common.legend = TRUE)
# show the example of 16 datasets
plots$`1`