Introduction

Apply the hypergeometric distribution to evaluate the significance of conserved tDMRs.

library("gplots")
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 3.4.4
library("RColorBrewer")
library("scales")
## Warning: package 'scales' was built under R version 3.4.4
library("edgeR")
## Loading required package: limma
library("R.utils")
## Warning: package 'R.utils' was built under R version 3.4.4
## Loading required package: R.oo
## Warning: package 'R.oo' was built under R version 3.4.4
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.22.0 (2018-04-21) successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods
## The following objects are masked from 'package:base':
## 
##     attach, detach, gc, load, save
## R.utils v2.7.0 successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
## 
##     timestamp
## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, inherits, isOpen, parse, warnings
library("plyr")
library("limma")
library("gridExtra")
library("VennDiagram")
## Warning: package 'VennDiagram' was built under R version 3.4.4
## Loading required package: grid
## Loading required package: futile.logger
source("functions.R")
library(ashr)
library(cowplot)
## Warning: package 'cowplot' was built under R version 3.4.4
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
tdmr_pairwise_overlap <- read.csv("../data/tDMR_overlap_data.csv")

Function for the human-chimpanzee tDMR overlap

# Larger population- chimpanzees

# Add columns for results
tdmr_pairwise_overlap[,13] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))
tdmr_pairwise_overlap[,14] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))

for (i in 1:nrow(tdmr_pairwise_overlap)){
  m <- tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,5] + tdmr_pairwise_overlap[i,6] + tdmr_pairwise_overlap[i,7]
  n <- tdmr_pairwise_overlap[i,11] - m
  x <- tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,5]
  q = x
  k = tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,4]+tdmr_pairwise_overlap[i,5]+tdmr_pairwise_overlap[i,9]

# Expected

  tdmr_pairwise_overlap[i,13] <- which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))

# P value of observed
  tdmr_pairwise_overlap[i,14] <- phyper(q, m, n, k, lower.tail = FALSE)
}




# When the larger population is the humans

# Add columns for results
tdmr_pairwise_overlap[,15] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))
tdmr_pairwise_overlap[,16] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))

for (i in 1:nrow(tdmr_pairwise_overlap)){
  m <- tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,4]+tdmr_pairwise_overlap[i,5]+tdmr_pairwise_overlap[i,9]
  n <- tdmr_pairwise_overlap[i,12] - m
  x <- tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,5]
  q = x
  k = tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,5] + tdmr_pairwise_overlap[i,6] + tdmr_pairwise_overlap[i,7]
  
# Expected

  tdmr_pairwise_overlap[i,15] <- which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))

# P value of observed
  tdmr_pairwise_overlap[i,16] <- phyper(q, m, n, k, lower.tail = FALSE)
}

Human-chimpanzee-rhesus macaque tDMR overlap

# Chimpanzees as larger

# Add columns for results
tdmr_pairwise_overlap[,17] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))
tdmr_pairwise_overlap[,18] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))

for (i in 1:nrow(tdmr_pairwise_overlap)){
  m <- tdmr_pairwise_overlap[i,10]
  n <- tdmr_pairwise_overlap[i,11] - m
  x <- tdmr_pairwise_overlap[i,3] 
  q = x
  k =  tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,7] + tdmr_pairwise_overlap[i,8] + tdmr_pairwise_overlap[i,9]

# Expected

  tdmr_pairwise_overlap[i,17] <- which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))

# P value of observed
  tdmr_pairwise_overlap[i,18] <- phyper(q, m, n, k, lower.tail = FALSE)
}


# Humans as larger

# Add columns for results
tdmr_pairwise_overlap[,19] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))
tdmr_pairwise_overlap[,20] <- array(NA, dim = c(nrow(tdmr_pairwise_overlap),1))

for (i in 1:nrow(tdmr_pairwise_overlap)){
  m <- tdmr_pairwise_overlap[i,10]
  n <- tdmr_pairwise_overlap[i,12] - m
  x <- tdmr_pairwise_overlap[i,3] 
  q = x
  k =  tdmr_pairwise_overlap[i,3] + tdmr_pairwise_overlap[i,7] + tdmr_pairwise_overlap[i,8] + tdmr_pairwise_overlap[i,9]

# Expected

  tdmr_pairwise_overlap[i,19] <- which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))

# P value of observed
  tdmr_pairwise_overlap[i,20] <- phyper(q, m, n, k, lower.tail = FALSE)
}

# Observed versus expected

tdmr_pairwise_overlap[,21] <- tdmr_pairwise_overlap[,3] - tdmr_pairwise_overlap[,19]

summary(tdmr_pairwise_overlap[,21])
##        V1        
##  Min.   :   8.0  
##  1st Qu.: 131.8  
##  Median : 444.0  
##  Mean   : 736.2  
##  3rd Qu.:1141.8  
##  Max.   :2976.0
m <- 51
  n <- 1750509 - m
  x <- 5 
  q = x
  k =  5+20+488+13

# Expected

 which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 1
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 1.273586e-14
m <- 42
  n <- 942364 - m
  x <- 4 
  q = x
  k =  10+179+9+4

# Expected

 which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 1
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 3.638879e-13
m <- 42
  n <- 942364 - m
  x <- 4 
  q = x
  k =  10+179+9+4

# Expected

 which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 1
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 3.638879e-13
m <- 46+5
  n <- 10000 - m
  x <- 5
  q = x
  k =  526

# Expected

 which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 2
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0.04994839

Heart specific

library(RColorBrewer)
colors <- colorRampPalette(c(brewer.pal(9, "Blues")[1],brewer.pal(9, "Blues")[9]))(100)
pal <- c(brewer.pal(9, "Set1"), brewer.pal(8, "Set2"), brewer.pal(12, "Set3"))


# RNA Seq hearts

hs = 203+42+129+30
ch = 233+75+192+59
overlap = 73+57+44+39

 m <- ch+overlap
  n <- 12184 - m
  x <- overlap
  q = x
  k = hs+overlap

# Expected

 which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 39
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 1.724958e-107
pdf("test.pdf", width = 7.2, height = 7.2)
draw.pairwise.venn(hs+overlap, ch+overlap, overlap,  main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:2])
## (polygon[GRID.polygon.11], polygon[GRID.polygon.12], polygon[GRID.polygon.13], polygon[GRID.polygon.14], text[GRID.text.15], text[GRID.text.16], text[GRID.text.17], text[GRID.text.18], text[GRID.text.19])
dev.off()
## quartz_off_screen 
##                 2
# First example
# RNA Seq hearts

hs = 866+495
ch = 891+549
overlap = 2304

 m <- ch+overlap
  n <- 12184 - m
  x <- overlap
  q = x
  k = hs+overlap

# Expected

 which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 1126
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0
pdf("test.pdf", width = 7.2, height = 7.2)
draw.pairwise.venn(hs+overlap, ch+overlap, overlap,  main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:2])
## (polygon[GRID.polygon.20], polygon[GRID.polygon.21], polygon[GRID.polygon.22], polygon[GRID.polygon.23], text[GRID.text.24], text[GRID.text.25], text[GRID.text.26], text[GRID.text.27], text[GRID.text.28])
dev.off()
## quartz_off_screen 
##                 2
## pairwise DE 

hs = 866+495
ch = 891+1227
overlap = 2304+549

 m <- ch+overlap
  n <- 12184 - m
  x <- overlap
  q = x
  k = hs+overlap

# Expected

 which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 1719
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0
pdf("test2.pdf", width = 7.2, height = 7.2)
draw.pairwise.venn(hs+overlap, ch+overlap, overlap,  main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:2], scaled = FALSE)
## (polygon[GRID.polygon.29], polygon[GRID.polygon.30], polygon[GRID.polygon.31], polygon[GRID.polygon.32], text[GRID.text.33], text[GRID.text.34], text[GRID.text.35], text[GRID.text.36], text[GRID.text.37])
dev.off()
## quartz_off_screen 
##                 2
# RNA Seq hearts

hs = 203+129
ch = 233+192
hc_overlap <- 73+44
overlap = 57+39
hr_overlap = 42+30
cr_overlap = 75+59
rs = 298+253

 m <- hc_overlap
  n <- 12184 - m
  x <- overlap
  q = x
  k = hr_overlap+cr_overlap+rs+overlap

# Expected

 which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 8
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 2.267179e-93
pdf("test.pdf", width = 7.2, height = 7.2)
draw.triple.venn(hs+overlap+hc_overlap+hr_overlap, ch+overlap+hc_overlap+cr_overlap, hr_overlap+cr_overlap+rs+overlap, hc_overlap+overlap, cr_overlap+overlap, hr_overlap+overlap, overlap,  main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:3])
## (polygon[GRID.polygon.38], polygon[GRID.polygon.39], polygon[GRID.polygon.40], polygon[GRID.polygon.41], polygon[GRID.polygon.42], polygon[GRID.polygon.43], text[GRID.text.44], text[GRID.text.45], text[GRID.text.46], text[GRID.text.47], text[GRID.text.48], text[GRID.text.49], text[GRID.text.50], text[GRID.text.51], text[GRID.text.52], text[GRID.text.53])
dev.off()
## quartz_off_screen 
##                 2
# RNA seq hearts lenient

hs = 211
ch = 511
hc_overlap <- 181
overlap = 281
hr_overlap = 101
cr_overlap = 449
rs = 729

 m <- hc_overlap
  n <- 12184 - m
  x <- overlap
  q = x
  k = hr_overlap+cr_overlap+rs+overlap

# Expected

 which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 23
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0
pdf("test.pdf", width = 7.2, height = 7.2)
draw.triple.venn(hs+overlap+hc_overlap+hr_overlap, ch+overlap+hc_overlap+cr_overlap, hr_overlap+cr_overlap+rs+overlap, hc_overlap+overlap, cr_overlap+overlap, hr_overlap+overlap, overlap,  main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:3])
## (polygon[GRID.polygon.54], polygon[GRID.polygon.55], polygon[GRID.polygon.56], polygon[GRID.polygon.57], polygon[GRID.polygon.58], polygon[GRID.polygon.59], text[GRID.text.60], text[GRID.text.61], text[GRID.text.62], text[GRID.text.63], text[GRID.text.64], text[GRID.text.65], text[GRID.text.66], text[GRID.text.67], text[GRID.text.68], text[GRID.text.69])
dev.off()
## quartz_off_screen 
##                 2
# Heart versus kidney

hs = 20459
ch = 9102
hc_overlap <- 4913+2288
overlap = 2288
hr_overlap = 2631
cr_overlap = 1426
rs = 16708

 m <- hc_overlap
  n <- 550861+ 1183536-m
  x <- overlap
  q = x
  k = hr_overlap+cr_overlap+rs+overlap

   which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 95
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0
pdf("test.pdf", width = 7.2, height = 7.2)
draw.triple.venn(hs+overlap+hc_overlap+hr_overlap, ch+overlap+hc_overlap+cr_overlap, hr_overlap+cr_overlap+rs+overlap, hc_overlap+overlap, cr_overlap+overlap, hr_overlap+overlap, overlap,  main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:3])
## (polygon[GRID.polygon.70], polygon[GRID.polygon.71], polygon[GRID.polygon.72], polygon[GRID.polygon.73], polygon[GRID.polygon.74], polygon[GRID.polygon.75], text[GRID.text.76], text[GRID.text.77], text[GRID.text.78], text[GRID.text.79], text[GRID.text.80], text[GRID.text.81], text[GRID.text.82], text[GRID.text.83], text[GRID.text.84], text[GRID.text.85])
dev.off()
## quartz_off_screen 
##                 2
hs = 247
ch = 345
hc_overlap <- 159
overlap = 106
hr_overlap = 105
cr_overlap = 162
rs = 480

 m <- hc_overlap
  n <- 12184-m
  x <- overlap
  q = x
  k = hr_overlap+cr_overlap+rs+overlap

   which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 11
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 4.614702e-86
pdf("test.pdf", width = 7.2, height = 7.2)
draw.triple.venn(hs+overlap+hc_overlap+hr_overlap, ch+overlap+hc_overlap+cr_overlap, hr_overlap+cr_overlap+rs+overlap, hc_overlap+overlap, cr_overlap+overlap, hr_overlap+overlap, overlap,  main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:3])
## (polygon[GRID.polygon.86], polygon[GRID.polygon.87], polygon[GRID.polygon.88], polygon[GRID.polygon.89], polygon[GRID.polygon.90], polygon[GRID.polygon.91], text[GRID.text.92], text[GRID.text.93], text[GRID.text.94], text[GRID.text.95], text[GRID.text.96], text[GRID.text.97], text[GRID.text.98], text[GRID.text.99], text[GRID.text.100], text[GRID.text.101])
dev.off()
## quartz_off_screen 
##                 2
# Heart versus kidney

hs = 204
ch = 9102
hc_overlap <- 4913+2288
overlap = 2288
hr_overlap = 2631
cr_overlap = 1426
rs = 16708

 m <- hc_overlap
  n <- 550861+ 1183536-m
  x <- overlap
  q = x
  k = hr_overlap+cr_overlap+rs+overlap

   which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 95
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0
# Heart versus kidney

hs = 319+2094
ch = 198+1014
hc_overlap <- 38+351
overlap = 4+62
hr_overlap = 9+92
cr_overlap = 10+66
rs = 179+1175

 m <- hc_overlap
  n <- 550861+ 1183536
  x <- overlap
  q = x
  k = hr_overlap+cr_overlap+rs+overlap

   which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 1
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 1.596914e-128
pdf("test.pdf", width = 7.2, height = 7.2)
draw.triple.venn(hs+overlap+hc_overlap+hr_overlap, ch+overlap+hc_overlap+cr_overlap, hr_overlap+cr_overlap+rs+overlap, hc_overlap+overlap, cr_overlap+overlap, hr_overlap+overlap, overlap,  main= "Heart specific DE genes", main.cex = 3, cat.cex = 3, cex=3.5, lty=1, fontfamily = "sans", cat.fontfamily = "sans", main.fontfamily = "sans", cat.fontface = "bold", main.fontface = "bold", cat.dist = 0.05, fill = pal[1:3])
## (polygon[GRID.polygon.102], polygon[GRID.polygon.103], polygon[GRID.polygon.104], polygon[GRID.polygon.105], polygon[GRID.polygon.106], polygon[GRID.polygon.107], text[GRID.text.108], text[GRID.text.109], text[GRID.text.110], text[GRID.text.111], text[GRID.text.112], text[GRID.text.113], text[GRID.text.114], text[GRID.text.115], text[GRID.text.116], text[GRID.text.117])
dev.off()
## quartz_off_screen 
##                 2
# Heart versus lung
hs = 3752+7319
ch = 2196+3287
hc_overlap <- 625+130+1446+322
overlap = 130+322
hr_overlap = 223+401
cr_overlap = 196+332
rs = 2409+3013

 m <- hc_overlap
  n <- 550861+ 1183536
  x <- overlap
  q = x
  k = hr_overlap+cr_overlap+rs+overlap

   which(grepl(max(dhyper(1:x, m, n, k)), dhyper(1:x, m, n, k)))
## [1] 10
# P value of observed
phyper(q, m, n, k, lower.tail = FALSE)
## [1] 0