Apply the hypergeometric distribution to evaluate the significance of conserved tDMRs.
library("gplots")
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 3.4.4
library("RColorBrewer")
library("scales")
## Warning: package 'scales' was built under R version 3.4.4
library("edgeR")
## Loading required package: limma
library("R.utils")
## Warning: package 'R.utils' was built under R version 3.4.4
## Loading required package: R.oo
## Warning: package 'R.oo' was built under R version 3.4.4
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.22.0 (2018-04-21) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
## R.utils v2.7.0 successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
library("plyr")
library("limma")
library("gridExtra")
library("VennDiagram")
## Warning: package 'VennDiagram' was built under R version 3.4.4
## Loading required package: grid
## Loading required package: futile.logger
source("functions.R")
library(ashr)
library(cowplot)
## Warning: package 'cowplot' was built under R version 3.4.4
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
tdmr_pairwise_overlap <- read.csv("../data/tDMR_overlap_data.csv")
# Larger population- chimpanzees
# Add columns for results
tdmr_pairwise_overlap[,13] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))
tdmr_pairwise_overlap[,14] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))
for (i in 1:nrow(tdmr_pairwise_overlap)){
m <- tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,5] + tdmr_pairwise_overlap[i,6] + tdmr_pairwise_overlap[i,7]
n <- tdmr_pairwise_overlap[i,11] - m
x <- tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,5]
q = x
k = tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,4]+tdmr_pairwise_overlap[i,5]+tdmr_pairwise_overlap[i,9]
# Expected
tdmr_pairwise_overlap[i,13] <- which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
# P value of observed
tdmr_pairwise_overlap[i,14] <- phyper(q, m, n, k, lower.tail = FALSE)
}
# When the larger population is the humans
# Add columns for results
tdmr_pairwise_overlap[,15] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))
tdmr_pairwise_overlap[,16] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))
for (i in 1:nrow(tdmr_pairwise_overlap)){
m <- tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,4]+tdmr_pairwise_overlap[i,5]+tdmr_pairwise_overlap[i,9]
n <- tdmr_pairwise_overlap[i,12] - m
x <- tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,5]
q = x
k = tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,5] + tdmr_pairwise_overlap[i,6] + tdmr_pairwise_overlap[i,7]
# Expected
tdmr_pairwise_overlap[i,15] <- which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
# P value of observed
tdmr_pairwise_overlap[i,16] <- phyper(q, m, n, k, lower.tail = FALSE)
}
# Chimpanzees as larger
# Add columns for results
tdmr_pairwise_overlap[,17] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))
tdmr_pairwise_overlap[,18] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))
for (i in 1:nrow(tdmr_pairwise_overlap)){
m <- tdmr_pairwise_overlap[i,10]
n <- tdmr_pairwise_overlap[i,11] - m
x <- tdmr_pairwise_overlap[i,3]
q = x
k = tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,7] + tdmr_pairwise_overlap[i,8] + tdmr_pairwise_overlap[i,9]
# Expected
tdmr_pairwise_overlap[i,17] <- which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
# P value of observed
tdmr_pairwise_overlap[i,18] <- phyper(q, m, n, k, lower.tail = FALSE)
}
# Humans as larger
# Add columns for results
tdmr_pairwise_overlap[,19] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))
tdmr_pairwise_overlap[,20] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))
for (i in 1:nrow(tdmr_pairwise_overlap)){
m <- tdmr_pairwise_overlap[i,10]
n <- tdmr_pairwise_overlap[i,12] - m
x <- tdmr_pairwise_overlap[i,3]
q = x
k = tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,7] + tdmr_pairwise_overlap[i,8] + tdmr_pairwise_overlap[i,9]
# Expected
tdmr_pairwise_overlap[i,19] <- which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
# P value of observed
tdmr_pairwise_overlap[i,20] <- phyper(q, m, n, k, lower.tail = FALSE)
}
# Observed versus expected
tdmr_pairwise_overlap[,21] <- tdmr_pairwise_overlap[,3] - tdmr_pairwise_overlap[,19]
summary(tdmr_pairwise_overlap[,21])
## V1
## Min. : 8.0
## 1st Qu.: 131.8
## Median : 444.0
## Mean : 736.2
## 3rd Qu.:1141.8
## Max. :2976.0
m <- 51
n <- 1750509 - m
x <- 5
q = x
k = 5+20+488+13
# Expected
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 1
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 1.273586e-14
m <- 42
n <- 942364 - m
x <- 4
q = x
k = 10+179+9+4
# Expected
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 1
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 3.638879e-13
m <- 42
n <- 942364 - m
x <- 4
q = x
k = 10+179+9+4
# Expected
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 1
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 3.638879e-13
m <- 46+5
n <- 10000 - m
x <- 5
q = x
k = 526
# Expected
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 2
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0.04994839
library(RColorBrewer)
colors <- colorRampPalette(c(brewer.pal(9, "Blues")[1],brewer.pal(9, "Blues")[9]))(100)
pal <- c(brewer.pal(9, "Set1"), brewer.pal(8, "Set2"), brewer.pal(12, "Set3"))
# RNA Seq hearts
hs = 203+42+129+30
ch = 233+75+192+59
overlap = 73+57+44+39
m <- ch+overlap
n <- 12184 - m
x <- overlap
q = x
k = hs+overlap
# Expected
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 39
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 1.724958e-107
pdf("test.pdf", width = 7.2, height = 7.2)
draw.pairwise.venn(hs+overlap, ch+overlap, overlap, main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:2])
## (polygon[GRID.polygon.11], polygon[GRID.polygon.12], polygon[GRID.polygon.13], polygon[GRID.polygon.14], text[GRID.text.15], text[GRID.text.16], text[GRID.text.17], text[GRID.text.18], text[GRID.text.19])
dev.off()
## quartz_off_screen
## 2
# First example
# RNA Seq hearts
hs = 866+495
ch = 891+549
overlap = 2304
m <- ch+overlap
n <- 12184 - m
x <- overlap
q = x
k = hs+overlap
# Expected
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 1126
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0
pdf("test.pdf", width = 7.2, height = 7.2)
draw.pairwise.venn(hs+overlap, ch+overlap, overlap, main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:2])
## (polygon[GRID.polygon.20], polygon[GRID.polygon.21], polygon[GRID.polygon.22], polygon[GRID.polygon.23], text[GRID.text.24], text[GRID.text.25], text[GRID.text.26], text[GRID.text.27], text[GRID.text.28])
dev.off()
## quartz_off_screen
## 2
## pairwise DE
hs = 866+495
ch = 891+1227
overlap = 2304+549
m <- ch+overlap
n <- 12184 - m
x <- overlap
q = x
k = hs+overlap
# Expected
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 1719
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0
pdf("test2.pdf", width = 7.2, height = 7.2)
draw.pairwise.venn(hs+overlap, ch+overlap, overlap, main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:2], scaled = FALSE)
## (polygon[GRID.polygon.29], polygon[GRID.polygon.30], polygon[GRID.polygon.31], polygon[GRID.polygon.32], text[GRID.text.33], text[GRID.text.34], text[GRID.text.35], text[GRID.text.36], text[GRID.text.37])
dev.off()
## quartz_off_screen
## 2
# RNA Seq hearts
hs = 203+129
ch = 233+192
hc_overlap <- 73+44
overlap = 57+39
hr_overlap = 42+30
cr_overlap = 75+59
rs = 298+253
m <- hc_overlap
n <- 12184 - m
x <- overlap
q = x
k = hr_overlap+cr_overlap+rs+overlap
# Expected
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 8
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 2.267179e-93
pdf("test.pdf", width = 7.2, height = 7.2)
draw.triple.venn(hs+overlap+hc_overlap+hr_overlap, ch+overlap+hc_overlap+cr_overlap, hr_overlap+cr_overlap+rs+overlap, hc_overlap+overlap, cr_overlap+overlap, hr_overlap+overlap, overlap, main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:3])
## (polygon[GRID.polygon.38], polygon[GRID.polygon.39], polygon[GRID.polygon.40], polygon[GRID.polygon.41], polygon[GRID.polygon.42], polygon[GRID.polygon.43], text[GRID.text.44], text[GRID.text.45], text[GRID.text.46], text[GRID.text.47], text[GRID.text.48], text[GRID.text.49], text[GRID.text.50], text[GRID.text.51], text[GRID.text.52], text[GRID.text.53])
dev.off()
## quartz_off_screen
## 2
# RNA seq hearts lenient
hs = 211
ch = 511
hc_overlap <- 181
overlap = 281
hr_overlap = 101
cr_overlap = 449
rs = 729
m <- hc_overlap
n <- 12184 - m
x <- overlap
q = x
k = hr_overlap+cr_overlap+rs+overlap
# Expected
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 23
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0
pdf("test.pdf", width = 7.2, height = 7.2)
draw.triple.venn(hs+overlap+hc_overlap+hr_overlap, ch+overlap+hc_overlap+cr_overlap, hr_overlap+cr_overlap+rs+overlap, hc_overlap+overlap, cr_overlap+overlap, hr_overlap+overlap, overlap, main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:3])
## (polygon[GRID.polygon.54], polygon[GRID.polygon.55], polygon[GRID.polygon.56], polygon[GRID.polygon.57], polygon[GRID.polygon.58], polygon[GRID.polygon.59], text[GRID.text.60], text[GRID.text.61], text[GRID.text.62], text[GRID.text.63], text[GRID.text.64], text[GRID.text.65], text[GRID.text.66], text[GRID.text.67], text[GRID.text.68], text[GRID.text.69])
dev.off()
## quartz_off_screen
## 2
# Heart versus kidney
hs = 20459
ch = 9102
hc_overlap <- 4913+2288
overlap = 2288
hr_overlap = 2631
cr_overlap = 1426
rs = 16708
m <- hc_overlap
n <- 550861+ 1183536-m
x <- overlap
q = x
k = hr_overlap+cr_overlap+rs+overlap
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 95
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0
pdf("test.pdf", width = 7.2, height = 7.2)
draw.triple.venn(hs+overlap+hc_overlap+hr_overlap, ch+overlap+hc_overlap+cr_overlap, hr_overlap+cr_overlap+rs+overlap, hc_overlap+overlap, cr_overlap+overlap, hr_overlap+overlap, overlap, main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:3])
## (polygon[GRID.polygon.70], polygon[GRID.polygon.71], polygon[GRID.polygon.72], polygon[GRID.polygon.73], polygon[GRID.polygon.74], polygon[GRID.polygon.75], text[GRID.text.76], text[GRID.text.77], text[GRID.text.78], text[GRID.text.79], text[GRID.text.80], text[GRID.text.81], text[GRID.text.82], text[GRID.text.83], text[GRID.text.84], text[GRID.text.85])
dev.off()
## quartz_off_screen
## 2
hs = 247
ch = 345
hc_overlap <- 159
overlap = 106
hr_overlap = 105
cr_overlap = 162
rs = 480
m <- hc_overlap
n <- 12184-m
x <- overlap
q = x
k = hr_overlap+cr_overlap+rs+overlap
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 11
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 4.614702e-86
pdf("test.pdf", width = 7.2, height = 7.2)
draw.triple.venn(hs+overlap+hc_overlap+hr_overlap, ch+overlap+hc_overlap+cr_overlap, hr_overlap+cr_overlap+rs+overlap, hc_overlap+overlap, cr_overlap+overlap, hr_overlap+overlap, overlap, main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:3])
## (polygon[GRID.polygon.86], polygon[GRID.polygon.87], polygon[GRID.polygon.88], polygon[GRID.polygon.89], polygon[GRID.polygon.90], polygon[GRID.polygon.91], text[GRID.text.92], text[GRID.text.93], text[GRID.text.94], text[GRID.text.95], text[GRID.text.96], text[GRID.text.97], text[GRID.text.98], text[GRID.text.99], text[GRID.text.100], text[GRID.text.101])
dev.off()
## quartz_off_screen
## 2
# Heart versus kidney
hs = 204
ch = 9102
hc_overlap <- 4913+2288
overlap = 2288
hr_overlap = 2631
cr_overlap = 1426
rs = 16708
m <- hc_overlap
n <- 550861+ 1183536-m
x <- overlap
q = x
k = hr_overlap+cr_overlap+rs+overlap
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 95
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0
# Heart versus kidney
hs = 319+2094
ch = 198+1014
hc_overlap <- 38+351
overlap = 4+62
hr_overlap = 9+92
cr_overlap = 10+66
rs = 179+1175
m <- hc_overlap
n <- 550861+ 1183536
x <- overlap
q = x
k = hr_overlap+cr_overlap+rs+overlap
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 1
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 1.596914e-128
pdf("test.pdf", width = 7.2, height = 7.2)
draw.triple.venn(hs+overlap+hc_overlap+hr_overlap, ch+overlap+hc_overlap+cr_overlap, hr_overlap+cr_overlap+rs+overlap, hc_overlap+overlap, cr_overlap+overlap, hr_overlap+overlap, overlap, main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:3])
## (polygon[GRID.polygon.102], polygon[GRID.polygon.103], polygon[GRID.polygon.104], polygon[GRID.polygon.105], polygon[GRID.polygon.106], polygon[GRID.polygon.107], text[GRID.text.108], text[GRID.text.109], text[GRID.text.110], text[GRID.text.111], text[GRID.text.112], text[GRID.text.113], text[GRID.text.114], text[GRID.text.115], text[GRID.text.116], text[GRID.text.117])
dev.off()
## quartz_off_screen
## 2
# Heart versus lung
hs = 3752+7319
ch = 2196+3287
hc_overlap <- 625+130+1446+322
overlap = 130+322
hr_overlap = 223+401
cr_overlap = 196+332
rs = 2409+3013
m <- hc_overlap
n <- 550861+ 1183536
x <- overlap
q = x
k = hr_overlap+cr_overlap+rs+overlap
which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 10
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0