# Load libraries
library("edgeR")
## Loading required package: limma
library("limma")
library("plyr")
library("ashr")
## Warning: package 'ashr' was built under R version 3.4.4
library("cowplot")
## Warning: package 'cowplot' was built under R version 3.4.4
## Loading required package: ggplot2
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
library("vashr")
## Loading required package: SQUAREM
## Loading required package: qvalue
library("devtools")
#devtools::install_github("jhsiao999/mediation/pkg")
library("medinome")

library("ggplot2")
library("qvalue")
library("RColorBrewer")
library("topGO")
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
## 
##     plotMA
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colMeans,
##     colnames, colSums, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply,
##     lengths, Map, mapply, match, mget, order, paste, pmax,
##     pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
##     rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min
## Loading required package: graph
## 
## Attaching package: 'graph'
## The following object is masked from 'package:plyr':
## 
##     join
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
## 
##     rename
## The following object is masked from 'package:base':
## 
##     expand.grid
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
## 
##     desc
## 
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
library("clusterProfiler")
## Loading required package: DOSE
## DOSE v3.4.0  For help: https://guangchuangyu.github.io/DOSE
## 
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
## clusterProfiler v3.6.0  For help: https://guangchuangyu.github.io/clusterProfiler
## 
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu., Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
library("org.Hs.eg.db")
## 
library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble  2.1.1       ✔ purrr   0.3.2  
## ✔ tidyr   0.8.3       ✔ dplyr   0.8.0.1
## ✔ readr   1.3.1       ✔ stringr 1.4.0  
## ✔ tibble  2.1.1       ✔ forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.4.4
## Warning: package 'tidyr' was built under R version 3.4.4
## Warning: package 'readr' was built under R version 3.4.4
## Warning: package 'purrr' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'stringr' was built under R version 3.4.4
## Warning: package 'forcats' was built under R version 3.4.4
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange()         masks plyr::arrange()
## ✖ stringr::boundary()      masks graph::boundary()
## ✖ dplyr::collapse()        masks IRanges::collapse()
## ✖ dplyr::combine()         masks Biobase::combine(), BiocGenerics::combine()
## ✖ purrr::compact()         masks plyr::compact()
## ✖ dplyr::count()           masks plyr::count()
## ✖ dplyr::desc()            masks IRanges::desc(), plyr::desc()
## ✖ tidyr::expand()          masks S4Vectors::expand()
## ✖ dplyr::failwith()        masks plyr::failwith()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ dplyr::first()           masks S4Vectors::first()
## ✖ cowplot::ggsave()        masks ggplot2::ggsave()
## ✖ dplyr::id()              masks plyr::id()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ dplyr::mutate()          masks plyr::mutate()
## ✖ BiocGenerics::Position() masks ggplot2::Position(), base::Position()
## ✖ purrr::reduce()          masks IRanges::reduce()
## ✖ dplyr::rename()          masks S4Vectors::rename(), plyr::rename()
## ✖ dplyr::select()          masks AnnotationDbi::select()
## ✖ purrr::simplify()        masks clusterProfiler::simplify()
## ✖ dplyr::slice()           masks IRanges::slice()
## ✖ dplyr::summarise()       masks plyr::summarise()
## ✖ dplyr::summarize()       masks plyr::summarize()
library(data.table)
## Warning: package 'data.table' was built under R version 3.4.4
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
library("dplyr")

Human heart-kidney conserved DE

getwd()
## [1] "/Users/laurenblake/Desktop/Regulatory_Evol/ashlar-trial/analysis"
# import sample labels
samples <- read.delim("../../../Reg_Evo_Primates/data/Sample_info_RNAseq_RIN.txt")
RIN <- samples$RIN
# expression
exprs <- read.table("../../../Reg_Evo_Primates/data/human_chimp_orth_exp_methyl_7725_hum.txt", sep="")
# methylation data
methyl <- read.csv("../../../Reg_Evo_Primates/data/chimp_human_orth_7725_avg_methyl_per_ts_gene.txt", sep="")

# Normalized gene expression data
cpm.voom.cyclic <- readRDS("../../../Reg_Evo_Primates/data/human_chimp_orth_cpm_voom_cyclic.rds")

Set FDR level/svalue

FDR_level <- 0.05
FDR_add <- 0.10

Human heart-kidney

# Check parameters

human_chimp_heart <- c(17, 20, 21, 24, 25, 28, 29)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]
rownames(hc_exprs_heart) <- rownames(exprs)
rownames(methyl) <- rownames(exprs)


Y <- hc_exprs_heart
two_species <- samples$Tissue[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
## Warning: package 'assertthat' was built under R version 3.4.4
## 
## Attaching package: 'assertthat'
## The following object is masked from 'package:tibble':
## 
##     has_name
fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    7603     122
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.134471
# Species no DE
species_no_DE <- c(1, 2, 5, 6, 9, 10, 13, 14)

species_no_DE_exprs <- hc_exprs[,species_no_DE]

species_no_DE_methyl <- exprs_methyl[,species_no_DE]

two_species <- samples$Tissue[species_no_DE]
no_DE_tissue <- droplevels.factor(two_species)

no_DE_RIN <- RIN[species_no_DE]

design <- model.matrix(~ as.factor(no_DE_tissue) + no_DE_RIN)
  fit_species_no_DE <- lmFit(cpm.voom.cyclic[,species_no_DE], design)
  fit_species_no_DE <- eBayes(fit_species_no_DE)

  HvC_Heart_fit_species_no_DE = topTable(fit_species_no_DE, coef=2, adjust="BH", number=Inf,
sort.by="none")

# Species DE 
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  
  HvC_Heart_fit_species_no_DE = cbind(HvC_Heart_fit_species_no_DE, HvC_Heart_fit_all)
  
 
      HvC_Heart_fit_all_5perc <- HvC_Heart_fit_species_no_DE[which(HvC_Heart_fit_species_no_DE[,6] <  FDR_level & HvC_Heart_fit_species_no_DE[,13] < FDR_level), ]

  
   human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

ind_siga <- (fit_ash$result$svalue < 0.05)
ind_siga[ind_siga == TRUE] <- "red"
ind_siga[ind_siga == FALSE] <- "black"

combine_dataa <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_siga), stringsAsFactors = FALSE)

combine_dataa[,1] <- as.numeric(combine_dataa[,1])
combine_dataa[,2] <- as.numeric(combine_dataa[,2])
colnames(combine_dataa) <- c("Effect", "SE", "ind_siga")

h_cons <- ggplot(combine_dataa, aes(combine_dataa$Effect, combine_dataa$SE)) + geom_point(color = c(combine_dataa$ind_siga)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0,5.5) + xlim(-8, 8)

Human heart-kidney human specific DE

# Species no DE
species_no_DE <- c(1, 2, 5, 6, 9, 10, 13, 14)

species_no_DE_exprs <- hc_exprs[,species_no_DE]

species_no_DE_methyl <- exprs_methyl[,species_no_DE]

two_species <- samples$Tissue[species_no_DE]
no_DE_tissue <- droplevels.factor(two_species)

no_DE_RIN <- RIN[species_no_DE]

design <- model.matrix(~ as.factor(no_DE_tissue) + no_DE_RIN)
  fit_species_no_DE <- lmFit(cpm.voom.cyclic[,species_no_DE], design)
  fit_species_no_DE <- eBayes(fit_species_no_DE)

  HvC_Heart_fit_species_no_DE = topTable(fit_species_no_DE, coef=2, adjust="BH", number=Inf,
sort.by="none")

# Species DE 
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  
  HvC_Heart_fit_species_no_DE = cbind(HvC_Heart_fit_species_no_DE, HvC_Heart_fit_all)
  
 
      HvC_Heart_fit_all_5perc <- HvC_Heart_fit_species_no_DE[which(HvC_Heart_fit_species_no_DE[,6] >  FDR_level & HvC_Heart_fit_species_no_DE[,13] < FDR_level), ]

  
   human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

summary(fit_ash$result$svalue < FDR_level)
##    Mode   FALSE    TRUE 
## logical     294       8
calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.02649007
ind_sigb <- (fit_ash$result$svalue < 0.05)
ind_sigb[ind_sigb == TRUE] <- "red"
ind_sigb[ind_sigb == FALSE] <- "black"

combine_datab <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_sigb), stringsAsFactors = FALSE)

combine_datab[,1] <- as.numeric(combine_datab[,1])
combine_datab[,2] <- as.numeric(combine_datab[,2])
colnames(combine_datab) <- c("Effect", "SE", "ind_sigb")

h_spec <- ggplot(combine_datab, aes(combine_datab$Effect, combine_datab$SE)) + geom_point(color = c(combine_datab$ind_sigb)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0,5.5) + xlim(-8, 8)

Human heart-kidney non-DE

# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val > FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0
ind_sigc <- (fit_ash$result$svalue < 0.05)
ind_sigc[ind_sigc == TRUE] <- "red"
ind_sigc[ind_sigc == FALSE] <- "black"

combine_datac <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_sigc), stringsAsFactors = FALSE)

combine_datac[,1] <- as.numeric(combine_datac[,1])
combine_datac[,2] <- as.numeric(combine_datac[,2])
colnames(combine_datac) <- c("Effect", "SE", "ind_sigc")

h_non <- ggplot(combine_datac, aes(combine_datac$Effect, combine_datac$SE)) + geom_point(color = c(combine_datac$ind_sigc)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0,5.5) + xlim(-8, 8)

Human all comparisons

# Make a table



comparison1 = data.frame(c("H v K", "H v K", "H v Li", "H v Li",  "H v Lu", "H v Lu", "K v Li", "K v Li", "K v Lu", "K v Lu", "Li v Lu", "Li v Lu"))
genes1 =  data.frame(c("Both", "Human", "Both", "Human","Both", "Human", "Both", "Human", "Both", "Human","Both", "Human"))

percentage1 = data.frame(c(0.165, 0.026, 0.19, 0.0312, 0.1396, 0.0309, 0.1259, 0.000296, 0.185, 0.065, 0.1295, 0.0568))



df1 <- cbind(comparison1, genes1, percentage1)
colnames(df1) <- c("Tissue", "Gene_type", "Percentage")
#df$Tissue_comparison <- factor(df$Tissue, levels = df$Tissue )


plot_intertissue <- ggplot(data=df1, aes(x=Gene_type, y=Percentage)) +
  geom_bar(stat="identity", fill = as.numeric(df1$Gene_type)) + facet_wrap(~ df1$Tissue) + xlab("DE in Humans and Chimpanzees") + ylab("% of genes for which DNAm \n could underlie DE") + theme(strip.text.x = element_text(size = 10, face = "bold")) + ylim(0, 0.3)
# Check parameters

human_chimp_heart <- c(17, 20, 21, 24, 25, 28, 29)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]
rownames(hc_exprs_heart) <- rownames(exprs)
rownames(methyl) <- rownames(exprs)

gene_value <- grep("ENSG00000072062", rownames(hc_exprs_heart))
gene_number <- gene_value

expression_levels <- hc_exprs_heart[gene_number,]
expression_tissue <- c("Kidney", "Heart", "Kidney", "Heart", "Kidney", "Heart", "Kidney")
expression_df <- as.data.frame(cbind(t(expression_levels), expression_tissue), stringsAsFactors = FALSE)
expression_df[,1] <- as.numeric(expression_df[,1])
colnames(expression_df) <- c("gene", "Tissue")
 
exp_plot <- ggplot(expression_df, aes(Tissue, gene)) + geom_boxplot() + ylab("Normalized gene expression level")

methyl_levels <- methyl[gene_number,]
methyl_tissue <- c("Kidney", "Heart", "Kidney", "Heart", "Kidney", "Heart", "Kidney")
methyl_df <- as.data.frame(cbind(t(methyl_levels), methyl_tissue), stringsAsFactors = FALSE)
methyl_df[,1] <- as.numeric(methyl_df[,1])
colnames(methyl_df) <- c("gene_name", "Tissue")

methyl_plot <- ggplot(methyl_df, aes(Tissue, gene_name)) + geom_boxplot() + ylab("Methylation level")

resid <- lm(t(expression_levels) ~ t(methyl_levels))$resid

resid_df <- as.data.frame(cbind(as.data.frame(resid), methyl_tissue), stringsAsFactors = FALSE)
resid_df[,1] <- as.numeric(resid_df[,1])
colnames(resid_df) <- c("gene_name", "Tissue")

resid_plot <- ggplot(resid_df, aes(Tissue, gene_name)) + geom_boxplot() + ylab("Normalized gene expression residual level")

plot_fig5df <- plot_grid(exp_plot, methyl_plot, resid_plot, nrow = 1)

Make into figure

#eight_plots <- plot_grid(h_cons, h_spec, h_non, hr_ded, plot_interspecies_hc, plot_interspecies_hr, labels = c("4A. Human-chimpanzee", "4B. Human-rhesus macaque", "4C.", "4D.", "4E.", "4F.", label_y = 0), ncol = 2)
#plot_fig5 <- plot_grid(eight_plots, ncol = 1, rel_heights=c(0.1, 1))

# first align the top-row plot (plot.iris) with the left-most plot of the
# bottom row (plot.mpg)

# then build the bottom row
top_row <- plot_grid(h_cons, h_spec, h_non, nrow = 1)
## Warning: Removed 4 rows containing missing values (geom_point).
bottom_row <- plot_grid(plot_intertissue, labels = c('5D. All tissues'))

plot_fig5 <- plot_grid(top_row, bottom_row, ncol = 1, nrow = 2, rel_heights = c(1,1.5))

#save_plot("/Users/laurenblake/Desktop/Tissue_Revisions/Figure_5_test.png", plot_fig5,
#           ncol = 1, # we're saving a grid plot of 2 columns
#           nrow = 2, # and 2 rows
           # each individual subplot should have an aspect ratio of 1.3
#           base_aspect_ratio = 1
#           )

# Divide into 2 plots

plot_fig5ac <- plot_grid(top_row, nrow = 1)

save_plot("/Users/laurenblake/Dropbox/Tissue_paper/Draft_11/Figures/Figure_5ac_test.pdf", plot_fig5ac,
           ncol = 3, # we're saving a grid plot of 2 columns
           nrow = 1, # and 2 rows
           # each individual subplot should have an aspect ratio of 1.3
           base_aspect_ratio = 1
           )

save_plot("/Users/laurenblake/Dropbox/Tissue_paper/Draft_11/Figures/Figure_5df_test.pdf", plot_fig5df,
           ncol = 3, # we're saving a grid plot of 2 columns
           nrow = 1, # and 2 rows
           # each individual subplot should have an aspect ratio of 1.3
           base_aspect_ratio = 1
           )


plot_fig5g <- plot_grid(bottom_row, ncol = 1, nrow = 1)

save_plot("/Users/laurenblake/Dropbox/Tissue_paper/Draft_11/Figures/Figure_5g_test.pdf", plot_fig5g,
           ncol = 1, # we're saving a grid plot of 2 columns
           nrow = 1, # and 2 rows
           # each individual subplot should have an aspect ratio of 1.3
           base_aspect_ratio = 1
           )

Human heart-kidney

# Check parameters

human_chimp_heart <- c(17, 20, 21, 24, 25, 28, 29)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]
rownames(hc_exprs_heart) <- rownames(exprs)
rownames(methyl) <- rownames(exprs)


Y <- hc_exprs_heart
two_species <- samples$Tissue[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    7603     122
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.134471
# Species no DE
species_no_DE <- c(1, 2, 5, 6, 9, 10, 13, 14)

species_no_DE_exprs <- hc_exprs[,species_no_DE]

species_no_DE_methyl <- exprs_methyl[,species_no_DE]

two_species <- samples$Tissue[species_no_DE]
no_DE_tissue <- droplevels.factor(two_species)

no_DE_RIN <- RIN[species_no_DE]

design <- model.matrix(~ as.factor(no_DE_tissue) + no_DE_RIN)
  fit_species_no_DE <- lmFit(cpm.voom.cyclic[,species_no_DE], design)
  fit_species_no_DE <- eBayes(fit_species_no_DE)

  HvC_Heart_fit_species_no_DE = topTable(fit_species_no_DE, coef=2, adjust="BH", number=Inf,
sort.by="none")

# Species DE 
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  
  HvC_Heart_fit_species_no_DE = cbind(HvC_Heart_fit_species_no_DE, HvC_Heart_fit_all)
  
 
      HvC_Heart_fit_all_5perc <- HvC_Heart_fit_species_no_DE[which(HvC_Heart_fit_species_no_DE[,6] <  FDR_level & HvC_Heart_fit_species_no_DE[,13] < FDR_level), ]

  
   human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

ind_siga <- (fit_ash$result$svalue < 0.05)
ind_siga[ind_siga == TRUE] <- "red"
ind_siga[ind_siga == FALSE] <- "black"

combine_dataa <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_siga), stringsAsFactors = FALSE)

combine_dataa[,1] <- as.numeric(combine_dataa[,1])
combine_dataa[,2] <- as.numeric(combine_dataa[,2])
colnames(combine_dataa) <- c("Effect", "SE", "ind_siga")

h_cons <- ggplot(combine_dataa, aes(combine_dataa$Effect, combine_dataa$SE)) + geom_point(color = c(combine_dataa$ind_siga)) + ylab("Standard error") + xlab(" ") + ylim(0,5.5) + xlim(-8, 8)

Human heart-kidney human specific DE

# Species no DE
species_no_DE <- c(1, 2, 5, 6, 9, 10, 13, 14)

species_no_DE_exprs <- hc_exprs[,species_no_DE]

species_no_DE_methyl <- exprs_methyl[,species_no_DE]

two_species <- samples$Tissue[species_no_DE]
no_DE_tissue <- droplevels.factor(two_species)

no_DE_RIN <- RIN[species_no_DE]

design <- model.matrix(~ as.factor(no_DE_tissue) + no_DE_RIN)
  fit_species_no_DE <- lmFit(cpm.voom.cyclic[,species_no_DE], design)
  fit_species_no_DE <- eBayes(fit_species_no_DE)

  HvC_Heart_fit_species_no_DE = topTable(fit_species_no_DE, coef=2, adjust="BH", number=Inf,
sort.by="none")

# Species DE 
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  
  HvC_Heart_fit_species_no_DE = cbind(HvC_Heart_fit_species_no_DE, HvC_Heart_fit_all)
  
 
      HvC_Heart_fit_all_5perc <- HvC_Heart_fit_species_no_DE[which(HvC_Heart_fit_species_no_DE[,6] >  FDR_level & HvC_Heart_fit_species_no_DE[,13] < FDR_level), ]

  
   human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

summary(fit_ash$result$svalue < FDR_level)
##    Mode   FALSE    TRUE 
## logical     294       8
calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.02649007
ind_sigb <- (fit_ash$result$svalue < 0.05)
ind_sigb[ind_sigb == TRUE] <- "red"
ind_sigb[ind_sigb == FALSE] <- "black"

combine_datab <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_sigb), stringsAsFactors = FALSE)

combine_datab[,1] <- as.numeric(combine_datab[,1])
combine_datab[,2] <- as.numeric(combine_datab[,2])
colnames(combine_datab) <- c("Effect", "SE", "ind_sigb")

h_spec <- ggplot(combine_datab, aes(combine_datab$Effect, combine_datab$SE)) + geom_point(color = c(combine_datab$ind_sigb)) + ylab(" ") + xlab("Effect size difference (before-after regressing out methylation levels") + ylim(0,5.5) + xlim(-8, 8)

Human heart-kidney non-DE

# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val > FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0
ind_sigc <- (fit_ash$result$svalue < 0.05)
ind_sigc[ind_sigc == TRUE] <- "red"
ind_sigc[ind_sigc == FALSE] <- "black"

combine_datac <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_sigc), stringsAsFactors = FALSE)

combine_datac[,1] <- as.numeric(combine_datac[,1])
combine_datac[,2] <- as.numeric(combine_datac[,2])
colnames(combine_datac) <- c("Effect", "SE", "ind_sigc")

h_non <- ggplot(combine_datac, aes(combine_datac$Effect, combine_datac$SE)) + geom_point(color = c(combine_datac$ind_sigc)) + ylab("") + xlab(" ") + ylim(0,5.5) + xlim(-8, 8)
top_row_poster <- plot_grid(h_cons, h_spec, h_non, 
                        labels = c('Conserved DE Genes', 'Non-conserved DE genes', 'non-DE genes'), nrow = 1, ncol = 3)
## Warning: Removed 4 rows containing missing values (geom_point).
#save_plot("/Users/laurenblake/Dropbox/primate_BS-seq_project/Presentations/Poster_5AC.pdf", top_row_poster,
#           ncol = 3, # we're saving a grid plot of 2 columns
#           nrow = 1, # and 2 rows
           # each individual subplot should have an aspect ratio of 1.3
#          base_aspect_ratio = 1
#           )



#save_plot("/Users/laurenblake/Dropbox/primate_BS-seq_project/Presentations/Poster_5D.pdf", plot_intertissue,
#           ncol = 1, # we're saving a grid plot of 2 columns
#           nrow = 1, # and 2 rows
            # each individual subplot should have an aspect ratio of 1.3
#          base_aspect_ratio = 1
#           )

For revisions

Human heart-kidney

# Check parameters

human_chimp_heart <- c(17, 20, 21, 24, 25, 28, 29)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]
rownames(hc_exprs_heart) <- rownames(exprs)
rownames(methyl) <- rownames(exprs)


Y <- hc_exprs_heart
two_species <- samples$Tissue[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    7603     122
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.134471
# Species no DE
species_no_DE <- c(1, 2, 5, 6, 9, 10, 13, 14)

species_no_DE_exprs <- hc_exprs[,species_no_DE]

species_no_DE_methyl <- exprs_methyl[,species_no_DE]

two_species <- samples$Tissue[species_no_DE]
no_DE_tissue <- droplevels.factor(two_species)

no_DE_RIN <- RIN[species_no_DE]

design <- model.matrix(~ as.factor(no_DE_tissue) + no_DE_RIN)
  fit_species_no_DE <- lmFit(cpm.voom.cyclic[,species_no_DE], design)
  fit_species_no_DE <- eBayes(fit_species_no_DE)

  HvC_Heart_fit_species_no_DE = topTable(fit_species_no_DE, coef=2, adjust="BH", number=Inf,
sort.by="none")

# Species DE 
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  
  HvC_Heart_fit_species_no_DE = cbind(HvC_Heart_fit_species_no_DE, HvC_Heart_fit_all)
  
 
      HvC_Heart_fit_all_5perc <- HvC_Heart_fit_species_no_DE[which(HvC_Heart_fit_species_no_DE[,6] <  FDR_level & HvC_Heart_fit_species_no_DE[,13] < FDR_level), ]

  
   human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

ind_siga <- (fit_ash$result$svalue < 0.05)
ind_siga[ind_siga == TRUE] <- "red"
ind_siga[ind_siga == FALSE] <- "black"

combine_dataa <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_siga), stringsAsFactors = FALSE)

combine_dataa[,1] <- as.numeric(combine_dataa[,1])
combine_dataa[,2] <- as.numeric(combine_dataa[,2])
colnames(combine_dataa) <- c("Effect", "SE", "ind_siga")

Signficant conserved heart-kidney in human

# Revisions- run GO
rownames(combine_dataa) <- rownames(Y)


check <- as.numeric(as.factor(combine_dataa$ind_siga))

# Merge ENSG with true/false

test_gene <- as.vector(check)
names(test_gene) <- rownames(Y)

test_gene_heart_kidney <- as.vector(check)
names(test_gene_heart_kidney) <- rownames(Y)

# Run topGO
go_data <- new("topGOdata",
                   ontology = "BP",
                   allGenes = test_gene, 
                    geneSel = function(allScore){
    return(allScore > 1)
},
                   nodeSize = 5,
                   annotationFun = annFUN.org,
                   mapping = "org.Hs.eg.db",
                   ID = "ensembl")
## 
## Building most specific GOs .....
##  ( 3826 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 7633 GO terms and 17464 relations. )
## 
## Annotating nodes ...............
##  ( 1050 genes annotated to the GO terms. )
library(clusterProfiler)


test_df <- as.data.frame(test_gene)
test_df <- data.frame(cbind(rownames(Y), test_df), stringsAsFactors = FALSE)
colnames(test_df) <- c("ensg", "test_gene")

formula_res <- compareCluster(ensg~test_gene, data=test_df, fun="enrichGO", universe = test_df$ensg,
                OrgDb         = org.Hs.eg.db,
                                keyType       = 'ENSEMBL',
                ont           = "BP",
                pAdjustMethod = "fdr",
                qvalueCutoff  = 0.05,
                                maxGSSize = 3000,
                                minGSSize = 3)

#Plot
make_dotplot <- dotplot(formula_res, showCategory = 20, by = "count")

plot_fig5h <- plot_grid(make_dotplot, ncol = 1, nrow = 1)

save_plot("/Users/laurenblake/Dropbox/Tissue_paper/Draft_11/Figures/Figure_5h_test.pdf", plot_fig5h,
           ncol = 2, # we're saving a grid plot of 2 columns
           nrow = 1, # and 2 rows
           # each individual subplot should have an aspect ratio of 1.3
           base_aspect_ratio = 1
           )
           
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 2225 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  3 nodes to be scored    (5 eliminated genes)
## 
##   Level 15:  6 nodes to be scored    (7 eliminated genes)
## 
##   Level 14:  13 nodes to be scored   (16 eliminated genes)
## 
##   Level 13:  34 nodes to be scored   (53 eliminated genes)
## 
##   Level 12:  56 nodes to be scored   (104 eliminated genes)
## 
##   Level 11:  87 nodes to be scored   (254 eliminated genes)
## 
##   Level 10:  164 nodes to be scored  (337 eliminated genes)
## 
##   Level 9:   249 nodes to be scored  (464 eliminated genes)
## 
##   Level 8:   295 nodes to be scored  (591 eliminated genes)
## 
##   Level 7:   368 nodes to be scored  (744 eliminated genes)
## 
##   Level 6:   381 nodes to be scored  (857 eliminated genes)
## 
##   Level 5:   291 nodes to be scored  (926 eliminated genes)
## 
##   Level 4:   174 nodes to be scored  (983 eliminated genes)
## 
##   Level 3:   80 nodes to be scored   (1011 eliminated genes)
## 
##   Level 2:   20 nodes to be scored   (1026 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (1037 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
                         orderBy = "weightFisher", ranksOf = "weightFisher",
                         topNodes = sum(score(go_test) < .01))

go_table
##         GO.ID                                        Term Annotated
## 1  GO:0030049                     muscle filament sliding        12
## 2  GO:0060048                  cardiac muscle contraction        26
## 3  GO:0002026 regulation of the force of heart contrac...         7
## 4  GO:0008016             regulation of heart contraction        33
## 5  GO:0006937            regulation of muscle contraction        25
## 6  GO:0045214                      sarcomere organization        13
## 7  GO:0032386       regulation of intracellular transport        51
## 8  GO:0043502             regulation of muscle adaptation        12
## 9  GO:0055010 ventricular cardiac muscle tissue morpho...        12
## 10 GO:0086064 cell communication by electrical couplin...         6
## 11 GO:1901655                 cellular response to ketone         6
##    Significant Expected weightFisher
## 1           10     1.95      5.1e-07
## 2           12     4.23      4.2e-05
## 3            6     1.14      0.00010
## 4           15     5.37      0.00048
## 5           14     4.07      0.00145
## 6            7     2.12      0.00194
## 7           11     8.31      0.00406
## 8            4     1.95      0.00429
## 9            6     1.95      0.00672
## 10           4     0.98      0.00780
## 11           4     0.98      0.00780

An example

# Check parameters

human_chimp_heart <- c(17, 20, 21, 24, 25, 28, 29)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only

library(ggplot2)
library(cowplot)

hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]
rownames(hc_exprs_heart) <- rownames(exprs)
rownames(methyl) <- rownames(exprs)

gene_value <- grep("ENSG00000072062", rownames(hc_exprs_heart))
gene_number <- gene_value

expression_levels <- hc_exprs_heart[gene_number,]
expression_tissue <- c("Kidney", "Heart", "Kidney", "Heart", "Kidney", "Heart", "Kidney")
expression_df <- as.data.frame(cbind(t(expression_levels), expression_tissue), stringsAsFactors = FALSE)
expression_df[,1] <- as.numeric(expression_df[,1])
colnames(expression_df) <- c("gene", "Tissue")
 
exp_plot <- ggplot(expression_df, aes(Tissue, gene)) + geom_boxplot() + ylab("Normalized gene \n expression level")

methyl_levels <- methyl[gene_number,]
methyl_tissue <- c("Kidney", "Heart", "Kidney", "Heart", "Kidney", "Heart", "Kidney")
methyl_df <- as.data.frame(cbind(t(methyl_levels), methyl_tissue), stringsAsFactors = FALSE)
methyl_df[,1] <- as.numeric(methyl_df[,1])
colnames(methyl_df) <- c("gene_name", "Tissue")

methyl_plot <- ggplot(methyl_df, aes(Tissue, gene_name)) + geom_boxplot() + ylab("Methylation level")

resid <- lm(t(expression_levels) ~ t(methyl_levels))$resid

resid_df <- as.data.frame(cbind(as.data.frame(resid), methyl_tissue), stringsAsFactors = FALSE)
resid_df[,1] <- as.numeric(resid_df[,1])
colnames(resid_df) <- c("gene_name", "Tissue")

resid_plot <- ggplot(resid_df, aes(Tissue, gene_name)) + geom_boxplot() + ylab("Normalized gene \n  expression \n residual level")

plot_grid(exp_plot, methyl_plot, resid_plot, nrow = 1)

Human heart-liver

# Check parameters

human_chimp_heart <- c(18, 20, 22, 24, 26, 28, 30)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]
rownames(hc_exprs_heart) <- rownames(exprs)
rownames(methyl) <- rownames(exprs)


Y <- hc_exprs_heart
two_species <- samples$Tissue[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    7562     163
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.1703801
# Species no DE
species_no_DE <- c(1, 3, 5, 7, 9, 11, 13, 15)

species_no_DE_exprs <- hc_exprs[,species_no_DE]

species_no_DE_methyl <- exprs_methyl[,species_no_DE]

two_species <- samples$Tissue[species_no_DE]
no_DE_tissue <- droplevels.factor(two_species)

no_DE_RIN <- RIN[species_no_DE]

design <- model.matrix(~ as.factor(no_DE_tissue) + no_DE_RIN)
  fit_species_no_DE <- lmFit(cpm.voom.cyclic[,species_no_DE], design)
  fit_species_no_DE <- eBayes(fit_species_no_DE)

  HvC_Heart_fit_species_no_DE = topTable(fit_species_no_DE, coef=2, adjust="BH", number=Inf,
sort.by="none")

# Species DE 
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  
  HvC_Heart_fit_species_no_DE = cbind(HvC_Heart_fit_species_no_DE, HvC_Heart_fit_all)
  
 
      HvC_Heart_fit_all_5perc <- HvC_Heart_fit_species_no_DE[which(HvC_Heart_fit_species_no_DE[,6] <  FDR_level & HvC_Heart_fit_species_no_DE[,13] < FDR_level), ]

  
   human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

ind_siga <- (fit_ash$result$svalue < 0.05)
ind_siga[ind_siga == TRUE] <- "red"
ind_siga[ind_siga == FALSE] <- "black"

combine_dataa <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_siga), stringsAsFactors = FALSE)

combine_dataa[,1] <- as.numeric(combine_dataa[,1])
combine_dataa[,2] <- as.numeric(combine_dataa[,2])
colnames(combine_dataa) <- c("Effect", "SE", "ind_siga")

Signficant conserved heart-liver in human

# Revisions- run GO
rownames(combine_dataa) <- rownames(Y)


check <- as.numeric(as.factor(combine_dataa$ind_siga))

# Merge ENSG with true/false

test_gene <- as.vector(check)
names(test_gene) <- rownames(Y)

# Run topGO
go_data <- new("topGOdata",
                   ontology = "BP",
                   allGenes = test_gene, 
                    geneSel = function(allScore){
    return(allScore > 1)
},
                   nodeSize = 5,
                   annotationFun = annFUN.org,
                   mapping = "org.Hs.eg.db",
                   ID = "ensembl")
## 
## Building most specific GOs .....
##  ( 4342 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 8265 GO terms and 18972 relations. )
## 
## Annotating nodes ...............
##  ( 1225 genes annotated to the GO terms. )
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 2512 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 17:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 15:  8 nodes to be scored    (5 eliminated genes)
## 
##   Level 14:  18 nodes to be scored   (17 eliminated genes)
## 
##   Level 13:  42 nodes to be scored   (53 eliminated genes)
## 
##   Level 12:  63 nodes to be scored   (123 eliminated genes)
## 
##   Level 11:  110 nodes to be scored  (308 eliminated genes)
## 
##   Level 10:  190 nodes to be scored  (413 eliminated genes)
## 
##   Level 9:   298 nodes to be scored  (590 eliminated genes)
## 
##   Level 8:   352 nodes to be scored  (748 eliminated genes)
## 
##   Level 7:   420 nodes to be scored  (906 eliminated genes)
## 
##   Level 6:   434 nodes to be scored  (1038 eliminated genes)
## 
##   Level 5:   301 nodes to be scored  (1116 eliminated genes)
## 
##   Level 4:   172 nodes to be scored  (1173 eliminated genes)
## 
##   Level 3:   81 nodes to be scored   (1187 eliminated genes)
## 
##   Level 2:   19 nodes to be scored   (1197 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (1207 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
                         orderBy = "weightFisher", ranksOf = "weightFisher",
                         topNodes = sum(score(go_test) < .01))

go_table
##         GO.ID                                        Term Annotated
## 1  GO:0030049                     muscle filament sliding        11
## 2  GO:0060048                  cardiac muscle contraction        20
## 3  GO:0010951 negative regulation of endopeptidase act...        28
## 4  GO:0006953                        acute-phase response        12
## 5  GO:0002576                      platelet degranulation        32
## 6  GO:0045214                      sarcomere organization        13
## 7  GO:1901381 positive regulation of potassium ion tra...         5
## 8  GO:0030194    positive regulation of blood coagulation         8
## 9  GO:0140115               export across plasma membrane         6
## 10 GO:0032722 positive regulation of chemokine product...         6
## 11 GO:0019835                                   cytolysis         6
## 12 GO:0002026 regulation of the force of heart contrac...         6
## 13 GO:0050830 defense response to Gram-positive bacter...         6
## 14 GO:0055010 ventricular cardiac muscle tissue morpho...        11
## 15 GO:0031639                      plasminogen activation        11
## 16 GO:0007597        blood coagulation, intrinsic pathway         9
## 17 GO:0032103 positive regulation of response to exter...        26
## 18 GO:1904063 negative regulation of cation transmembr...         7
## 19 GO:2001259 positive regulation of cation channel ac...         7
## 20 GO:0051917                  regulation of fibrinolysis         7
## 21 GO:0006641              triglyceride metabolic process        23
## 22 GO:0008016             regulation of heart contraction        31
## 23 GO:0006937            regulation of muscle contraction        22
## 24 GO:0042168                      heme metabolic process         5
## 25 GO:0071688 striated muscle myosin thick filament as...         5
## 26 GO:0099622 cardiac muscle cell membrane repolarizat...         5
## 27 GO:1901021 positive regulation of calcium ion trans...         5
## 28 GO:0002719 negative regulation of cytokine producti...         5
## 29 GO:0050829 defense response to Gram-negative bacter...         5
## 30 GO:0072378    blood coagulation, fibrin clot formation        12
## 31 GO:0042730                                fibrinolysis        10
## 32 GO:0006942 regulation of striated muscle contractio...         9
## 33 GO:0090277 positive regulation of peptide hormone s...        13
## 34 GO:0050728 negative regulation of inflammatory resp...        17
## 35 GO:1903115 regulation of actin filament-based movem...         8
## 36 GO:0015909             long-chain fatty acid transport         8
## 37 GO:0032755 positive regulation of interleukin-6 pro...         8
## 38 GO:0050873              brown fat cell differentiation         8
## 39 GO:0050729 positive regulation of inflammatory resp...        11
##    Significant Expected weightFisher
## 1           10     2.11      5.3e-07
## 2           13     3.84      1.1e-05
## 3           14     5.37      1.7e-05
## 4            9     2.30      3.9e-05
## 5           16     6.14      6.7e-05
## 6            9     2.49      0.00011
## 7            5     0.96      0.00025
## 8            6     1.53      0.00093
## 9            5     1.15      0.00127
## 10           5     1.15      0.00127
## 11           5     1.15      0.00127
## 12           5     1.15      0.00127
## 13           5     1.15      0.00127
## 14           7     2.11      0.00144
## 15           7     2.11      0.00144
## 16           6     1.73      0.00235
## 17          12     4.99      0.00350
## 18           5     1.34      0.00375
## 19           5     1.34      0.00375
## 20           5     1.34      0.00375
## 21          11     4.41      0.00466
## 22          17     5.95      0.00491
## 23          11     4.22      0.00529
## 24           4     0.96      0.00563
## 25           4     0.96      0.00563
## 26           4     0.96      0.00563
## 27           4     0.96      0.00563
## 28           4     0.96      0.00563
## 29           4     0.96      0.00563
## 30           9     2.30      0.00661
## 31           8     1.92      0.00666
## 32           6     1.73      0.00682
## 33           4     2.49      0.00707
## 34           8     3.26      0.00803
## 35           5     1.53      0.00844
## 36           5     1.53      0.00844
## 37           5     1.53      0.00844
## 38           5     1.53      0.00844
## 39           6     2.11      0.00915

Human heart-lung

# Check parameters

human_chimp_heart <- c(19, 20, 23, 24, 27, 28, 31)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]
rownames(hc_exprs_heart) <- rownames(exprs)
rownames(methyl) <- rownames(exprs)


Y <- hc_exprs_heart
two_species <- samples$Tissue[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    7637      88
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.1143466
# Species no DE
species_no_DE <- c(1, 4, 5, 8, 9, 12, 13, 16)

species_no_DE_exprs <- hc_exprs[,species_no_DE]

species_no_DE_methyl <- exprs_methyl[,species_no_DE]

two_species <- samples$Tissue[species_no_DE]
no_DE_tissue <- droplevels.factor(two_species)

no_DE_RIN <- RIN[species_no_DE]

design <- model.matrix(~ as.factor(no_DE_tissue) + no_DE_RIN)
  fit_species_no_DE <- lmFit(cpm.voom.cyclic[,species_no_DE], design)
  fit_species_no_DE <- eBayes(fit_species_no_DE)

  HvC_Heart_fit_species_no_DE = topTable(fit_species_no_DE, coef=2, adjust="BH", number=Inf,
sort.by="none")

# Species DE 
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  
  HvC_Heart_fit_species_no_DE = cbind(HvC_Heart_fit_species_no_DE, HvC_Heart_fit_all)
  
 
      HvC_Heart_fit_all_5perc <- HvC_Heart_fit_species_no_DE[which(HvC_Heart_fit_species_no_DE[,6] <  FDR_level & HvC_Heart_fit_species_no_DE[,13] < FDR_level), ]

  
   human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

ind_siga <- (fit_ash$result$svalue < 0.05)
ind_siga[ind_siga == TRUE] <- "red"
ind_siga[ind_siga == FALSE] <- "black"

combine_dataa <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_siga), stringsAsFactors = FALSE)

combine_dataa[,1] <- as.numeric(combine_dataa[,1])
combine_dataa[,2] <- as.numeric(combine_dataa[,2])
colnames(combine_dataa) <- c("Effect", "SE", "ind_siga")

Signficant conserved heart-lung in human

# Revisions- run GO
rownames(combine_dataa) <- rownames(Y)


check <- as.numeric(as.factor(combine_dataa$ind_siga))

# Merge ENSG with true/false

test_gene <- as.vector(check)
names(test_gene) <- rownames(Y)

# Run topGO
go_data <- new("topGOdata",
                   ontology = "BP",
                   allGenes = test_gene, 
                    geneSel = function(allScore){
    return(allScore > 1)
},
                   nodeSize = 5,
                   annotationFun = annFUN.org,
                   mapping = "org.Hs.eg.db",
                   ID = "ensembl")
## 
## Building most specific GOs .....
##  ( 3693 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 7318 GO terms and 16609 relations. )
## 
## Annotating nodes ...............
##  ( 1028 genes annotated to the GO terms. )
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 2136 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 19:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  1 nodes to be scored    (5 eliminated genes)
## 
##   Level 16:  4 nodes to be scored    (6 eliminated genes)
## 
##   Level 15:  7 nodes to be scored    (6 eliminated genes)
## 
##   Level 14:  17 nodes to be scored   (23 eliminated genes)
## 
##   Level 13:  38 nodes to be scored   (70 eliminated genes)
## 
##   Level 12:  58 nodes to be scored   (137 eliminated genes)
## 
##   Level 11:  98 nodes to be scored   (267 eliminated genes)
## 
##   Level 10:  166 nodes to be scored  (353 eliminated genes)
## 
##   Level 9:   234 nodes to be scored  (480 eliminated genes)
## 
##   Level 8:   279 nodes to be scored  (635 eliminated genes)
## 
##   Level 7:   344 nodes to be scored  (761 eliminated genes)
## 
##   Level 6:   370 nodes to be scored  (873 eliminated genes)
## 
##   Level 5:   271 nodes to be scored  (928 eliminated genes)
## 
##   Level 4:   155 nodes to be scored  (968 eliminated genes)
## 
##   Level 3:   72 nodes to be scored   (989 eliminated genes)
## 
##   Level 2:   19 nodes to be scored   (1000 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (1007 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
                         orderBy = "weightFisher", ranksOf = "weightFisher",
                         topNodes = sum(score(go_test) < .01))

go_table
##        GO.ID                                        Term Annotated
## 1 GO:0051056 regulation of small GTPase mediated sign...        29
## 2 GO:0001658 branching involved in ureteric bud morph...         6
## 3 GO:0006939                   smooth muscle contraction         6
## 4 GO:0001934 positive regulation of protein phosphory...        67
##   Significant Expected weightFisher
## 1           6     3.95       0.0037
## 2           4     0.82       0.0040
## 3           4     0.82       0.0040
## 4          11     9.12       0.0041

Human kidney-liver

# Check parameters

human_chimp_heart <- c(17, 18, 21, 22, 25, 26, 29, 30)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]
rownames(hc_exprs_heart) <- rownames(exprs)
rownames(methyl) <- rownames(exprs)


Y <- hc_exprs_heart
two_species <- samples$Tissue[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    7614     111
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.09890777
# Species no DE
species_no_DE <- c(2, 3, 6, 7, 10, 11, 14, 15)

species_no_DE_exprs <- hc_exprs[,species_no_DE]

species_no_DE_methyl <- exprs_methyl[,species_no_DE]

two_species <- samples$Tissue[species_no_DE]
no_DE_tissue <- droplevels.factor(two_species)

no_DE_RIN <- RIN[species_no_DE]

design <- model.matrix(~ as.factor(no_DE_tissue) + no_DE_RIN)
  fit_species_no_DE <- lmFit(cpm.voom.cyclic[,species_no_DE], design)
  fit_species_no_DE <- eBayes(fit_species_no_DE)

  HvC_Heart_fit_species_no_DE = topTable(fit_species_no_DE, coef=2, adjust="BH", number=Inf,
sort.by="none")

# Species DE 
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  
  HvC_Heart_fit_species_no_DE = cbind(HvC_Heart_fit_species_no_DE, HvC_Heart_fit_all)
  
 
      HvC_Heart_fit_all_5perc <- HvC_Heart_fit_species_no_DE[which(HvC_Heart_fit_species_no_DE[,6] <  FDR_level & HvC_Heart_fit_species_no_DE[,13] < FDR_level), ]

  
   human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

ind_siga <- (fit_ash$result$svalue < 0.05)
ind_siga[ind_siga == TRUE] <- "red"
ind_siga[ind_siga == FALSE] <- "black"

combine_dataa <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_siga), stringsAsFactors = FALSE)

combine_dataa[,1] <- as.numeric(combine_dataa[,1])
combine_dataa[,2] <- as.numeric(combine_dataa[,2])
colnames(combine_dataa) <- c("Effect", "SE", "ind_siga")

Signficant conserved kidney-liver in human

# Revisions- run GO
rownames(combine_dataa) <- rownames(Y)


check <- as.numeric(as.factor(combine_dataa$ind_siga))

# Merge ENSG with true/false

test_gene <- as.vector(check)
names(test_gene) <- rownames(Y)

test_gene_kidney_liver <- as.vector(check)
names(test_gene_kidney_liver) <- rownames(Y)


# Run topGO
go_data <- new("topGOdata",
                   ontology = "BP",
                   allGenes = test_gene, 
                    geneSel = function(allScore){
    return(allScore > 1)
},
                   nodeSize = 5,
                   annotationFun = annFUN.org,
                   mapping = "org.Hs.eg.db",
                   ID = "ensembl")
## 
## Building most specific GOs .....
##  ( 4430 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 8345 GO terms and 19236 relations. )
## 
## Annotating nodes ...............
##  ( 1191 genes annotated to the GO terms. )
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 2435 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 17:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 15:  4 nodes to be scored    (6 eliminated genes)
## 
##   Level 14:  10 nodes to be scored   (9 eliminated genes)
## 
##   Level 13:  33 nodes to be scored   (27 eliminated genes)
## 
##   Level 12:  63 nodes to be scored   (75 eliminated genes)
## 
##   Level 11:  105 nodes to be scored  (264 eliminated genes)
## 
##   Level 10:  187 nodes to be scored  (383 eliminated genes)
## 
##   Level 9:   290 nodes to be scored  (549 eliminated genes)
## 
##   Level 8:   346 nodes to be scored  (728 eliminated genes)
## 
##   Level 7:   405 nodes to be scored  (907 eliminated genes)
## 
##   Level 6:   399 nodes to be scored  (1032 eliminated genes)
## 
##   Level 5:   307 nodes to be scored  (1093 eliminated genes)
## 
##   Level 4:   178 nodes to be scored  (1142 eliminated genes)
## 
##   Level 3:   85 nodes to be scored   (1162 eliminated genes)
## 
##   Level 2:   20 nodes to be scored   (1171 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (1180 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
                         orderBy = "weightFisher", ranksOf = "weightFisher",
                         topNodes = sum(score(go_test) < .01))

go_table
##         GO.ID                                        Term Annotated
## 1  GO:0006898               receptor-mediated endocytosis        47
## 2  GO:0010951 negative regulation of endopeptidase act...        35
## 3  GO:0030449         regulation of complement activation        14
## 4  GO:0006953                        acute-phase response        14
## 5  GO:0006958    complement activation, classical pathway        11
## 6  GO:0032374         regulation of cholesterol transport        12
## 7  GO:0010873 positive regulation of cholesterol ester...         5
## 8  GO:0033700                         phospholipid efflux         5
## 9  GO:0034380 high-density lipoprotein particle assemb...         5
## 10 GO:0034371                      chylomicron remodeling         5
## 11 GO:0010896 regulation of triglyceride catabolic pro...         5
## 12 GO:0033344                          cholesterol efflux        13
## 13 GO:0022409 positive regulation of cell-cell adhesio...        29
## 14 GO:0007597        blood coagulation, intrinsic pathway         8
## 15 GO:0034372 very-low-density lipoprotein particle re...         8
## 16 GO:0060192      negative regulation of lipase activity         6
## 17 GO:0034384 high-density lipoprotein particle cleara...         6
## 18 GO:0034378                        chylomicron assembly         6
## 19 GO:0043691               reverse cholesterol transport        10
## 20 GO:0050777      negative regulation of immune response        18
## 21 GO:0050996 positive regulation of lipid catabolic p...         7
## 22 GO:0034375 high-density lipoprotein particle remode...         7
## 23 GO:0051047            positive regulation of secretion        41
## 24 GO:0090277 positive regulation of peptide hormone s...        13
## 25 GO:0002576                      platelet degranulation        34
## 26 GO:0030212                hyaluronan metabolic process        11
##    Significant Expected weightFisher
## 1           15     6.08      3.0e-06
## 2           13     4.53      2.8e-05
## 3            7     1.81      0.00082
## 4            7     1.81      0.00082
## 5            6     1.42      0.00113
## 6            5     1.55      0.00121
## 7            4     0.65      0.00121
## 8            4     0.65      0.00121
## 9            4     0.65      0.00121
## 10           4     0.65      0.00121
## 11           4     0.65      0.00121
## 12           6     1.68      0.00136
## 13           8     3.75      0.00136
## 14           5     1.03      0.00137
## 15           5     1.03      0.00137
## 16           4     0.78      0.00328
## 17           4     0.78      0.00328
## 18           4     0.78      0.00328
## 19           5     1.29      0.00496
## 20           8     2.33      0.00645
## 21           4     0.91      0.00688
## 22           4     0.91      0.00688
## 23          13     5.30      0.00693
## 24           6     1.68      0.00742
## 25          10     4.40      0.00799
## 26           5     1.42      0.00816

Cluster profiler

#test_df_heart_kidney <- as.data.frame(test_gene_heart_kidney)
#test_df_heart_kidney[,2] <- rownames(test_df_heart_kidney)
#colnames(test_df_heart_kidney) <- c("hk", "ensg")

#test_df_kidney_liver <- as.data.frame(test_gene_kidney_liver)
#test_df_kidney_liver[,2] <- rownames(test_df_kidney_liver)
#colnames(test_df_kidney_liver) <- c("hk", "ensg")

#check_overlap <- merge(test_df_heart_kidney, test_df_kidney_liver, by = "ensg")

#formula_res <- compareCluster(ensg~hk.x+hk.y, data=check_overlap, fun="enrichGO", universe = check_overlap$ensg,
#                OrgDb         = org.Hs.eg.db,
#                                keyType       = 'ENSEMBL',
#                ont           = "BP",
#                pAdjustMethod = "fdr",
#                qvalueCutoff  = 0.05,
#                                maxGSSize = 3000,
#                                minGSSize = 3)

#Plot
#p10 <- dotplot(formula_res)
#dotplot(formula_res, showCategory = 10, by = "geneRatio")

#plot_grid(p10)

Human kidney-lung

# Check parameters

human_chimp_heart <- c(17, 19, 21, 23, 25, 27, 29, 31)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]
rownames(hc_exprs_heart) <- rownames(exprs)
rownames(methyl) <- rownames(exprs)


Y <- hc_exprs_heart
two_species <- samples$Tissue[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    7665      60
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.1664581
# Species no DE
species_no_DE <- c(2, 4, 6, 8, 10, 12, 14, 16)

species_no_DE_exprs <- hc_exprs[,species_no_DE]

species_no_DE_methyl <- exprs_methyl[,species_no_DE]

two_species <- samples$Tissue[species_no_DE]
no_DE_tissue <- droplevels.factor(two_species)

no_DE_RIN <- RIN[species_no_DE]

design <- model.matrix(~ as.factor(no_DE_tissue) + no_DE_RIN)
  fit_species_no_DE <- lmFit(cpm.voom.cyclic[,species_no_DE], design)
  fit_species_no_DE <- eBayes(fit_species_no_DE)

  HvC_Heart_fit_species_no_DE = topTable(fit_species_no_DE, coef=2, adjust="BH", number=Inf,
sort.by="none")

# Species DE 
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  
  HvC_Heart_fit_species_no_DE = cbind(HvC_Heart_fit_species_no_DE, HvC_Heart_fit_all)
  
 
      HvC_Heart_fit_all_5perc <- HvC_Heart_fit_species_no_DE[which(HvC_Heart_fit_species_no_DE[,6] <  FDR_level & HvC_Heart_fit_species_no_DE[,13] < FDR_level), ]

  
   human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

ind_siga <- (fit_ash$result$svalue < 0.05)
ind_siga[ind_siga == TRUE] <- "red"
ind_siga[ind_siga == FALSE] <- "black"

combine_dataa <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_siga), stringsAsFactors = FALSE)

combine_dataa[,1] <- as.numeric(combine_dataa[,1])
combine_dataa[,2] <- as.numeric(combine_dataa[,2])
colnames(combine_dataa) <- c("Effect", "SE", "ind_siga")

Signficant conserved kidney-lung in human

# Revisions- run GO
rownames(combine_dataa) <- rownames(Y)


check <- as.numeric(as.factor(combine_dataa$ind_siga))

# Merge ENSG with true/false

test_gene <- as.vector(check)
names(test_gene) <- rownames(Y)

# Run topGO
go_data <- new("topGOdata",
                   ontology = "BP",
                   allGenes = test_gene, 
                    geneSel = function(allScore){
    return(allScore > 1)
},
                   nodeSize = 5,
                   annotationFun = annFUN.org,
                   mapping = "org.Hs.eg.db",
                   ID = "ensembl")
## 
## Building most specific GOs .....
##  ( 2940 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 6579 GO terms and 15016 relations. )
## 
## Annotating nodes ...............
##  ( 627 genes annotated to the GO terms. )
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 1704 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 15:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 14:  7 nodes to be scored    (0 eliminated genes)
## 
##   Level 13:  22 nodes to be scored   (5 eliminated genes)
## 
##   Level 12:  37 nodes to be scored   (47 eliminated genes)
## 
##   Level 11:  61 nodes to be scored   (132 eliminated genes)
## 
##   Level 10:  114 nodes to be scored  (176 eliminated genes)
## 
##   Level 9:   172 nodes to be scored  (265 eliminated genes)
## 
##   Level 8:   211 nodes to be scored  (352 eliminated genes)
## 
##   Level 7:   279 nodes to be scored  (437 eliminated genes)
## 
##   Level 6:   305 nodes to be scored  (496 eliminated genes)
## 
##   Level 5:   255 nodes to be scored  (543 eliminated genes)
## 
##   Level 4:   149 nodes to be scored  (587 eliminated genes)
## 
##   Level 3:   71 nodes to be scored   (602 eliminated genes)
## 
##   Level 2:   19 nodes to be scored   (611 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (614 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
                         orderBy = "weightFisher", ranksOf = "weightFisher",
                         topNodes = sum(score(go_test) < .01))

go_table
##        GO.ID                                        Term Annotated
## 1 GO:0030858 positive regulation of epithelial cell d...         5
## 2 GO:0019933                     cAMP-mediated signaling         5
## 3 GO:0035556           intracellular signal transduction       127
##   Significant Expected weightFisher
## 1           4     0.93       0.0048
## 2           4     0.93       0.0048
## 3          32    23.50       0.0095

Human liver-lung

# Check parameters

human_chimp_heart <- c(18, 19, 22, 23, 26, 27, 30, 31)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]
rownames(hc_exprs_heart) <- rownames(exprs)
rownames(methyl) <- rownames(exprs)


Y <- hc_exprs_heart
two_species <- samples$Tissue[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    7649      76
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.118798
# Species no DE
species_no_DE <- c(3, 4, 7, 8, 11, 12, 15, 16)

species_no_DE_exprs <- hc_exprs[,species_no_DE]

species_no_DE_methyl <- exprs_methyl[,species_no_DE]

two_species <- samples$Tissue[species_no_DE]
no_DE_tissue <- droplevels.factor(two_species)

no_DE_RIN <- RIN[species_no_DE]

design <- model.matrix(~ as.factor(no_DE_tissue) + no_DE_RIN)
  fit_species_no_DE <- lmFit(cpm.voom.cyclic[,species_no_DE], design)
  fit_species_no_DE <- eBayes(fit_species_no_DE)

  HvC_Heart_fit_species_no_DE = topTable(fit_species_no_DE, coef=2, adjust="BH", number=Inf,
sort.by="none")

# Species DE 
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  
  HvC_Heart_fit_species_no_DE = cbind(HvC_Heart_fit_species_no_DE, HvC_Heart_fit_all)
  
 
      HvC_Heart_fit_all_5perc <- HvC_Heart_fit_species_no_DE[which(HvC_Heart_fit_species_no_DE[,6] <  FDR_level & HvC_Heart_fit_species_no_DE[,13] < FDR_level), ]

  
   human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

ind_siga <- (fit_ash$result$svalue < 0.05)
ind_siga[ind_siga == TRUE] <- "red"
ind_siga[ind_siga == FALSE] <- "black"

combine_dataa <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_siga), stringsAsFactors = FALSE)

combine_dataa[,1] <- as.numeric(combine_dataa[,1])
combine_dataa[,2] <- as.numeric(combine_dataa[,2])
colnames(combine_dataa) <- c("Effect", "SE", "ind_siga")

Signficant conserved liver-lung in human

# Revisions- run GO
rownames(combine_dataa) <- rownames(Y)


check <- as.numeric(as.factor(combine_dataa$ind_siga))

# Merge ENSG with true/false

test_gene <- as.vector(check)
names(test_gene) <- rownames(Y)

# Run topGO
go_data <- new("topGOdata",
                   ontology = "BP",
                   allGenes = test_gene, 
                    geneSel = function(allScore){
    return(allScore > 1)
},
                   nodeSize = 5,
                   annotationFun = annFUN.org,
                   mapping = "org.Hs.eg.db",
                   ID = "ensembl")
## 
## Building most specific GOs .....
##  ( 3885 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 7600 GO terms and 17347 relations. )
## 
## Annotating nodes ...............
##  ( 1120 genes annotated to the GO terms. )
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 2287 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 15:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 14:  12 nodes to be scored   (0 eliminated genes)
## 
##   Level 13:  33 nodes to be scored   (19 eliminated genes)
## 
##   Level 12:  58 nodes to be scored   (81 eliminated genes)
## 
##   Level 11:  107 nodes to be scored  (229 eliminated genes)
## 
##   Level 10:  182 nodes to be scored  (338 eliminated genes)
## 
##   Level 9:   262 nodes to be scored  (489 eliminated genes)
## 
##   Level 8:   318 nodes to be scored  (690 eliminated genes)
## 
##   Level 7:   380 nodes to be scored  (831 eliminated genes)
## 
##   Level 6:   382 nodes to be scored  (949 eliminated genes)
## 
##   Level 5:   290 nodes to be scored  (1016 eliminated genes)
## 
##   Level 4:   164 nodes to be scored  (1070 eliminated genes)
## 
##   Level 3:   76 nodes to be scored   (1088 eliminated genes)
## 
##   Level 2:   19 nodes to be scored   (1095 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (1104 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
                         orderBy = "weightFisher", ranksOf = "weightFisher",
                         topNodes = sum(score(go_test) < .01))

go_table
##         GO.ID                                        Term Annotated
## 1  GO:0002576                      platelet degranulation        26
## 2  GO:0010951 negative regulation of endopeptidase act...        34
## 3  GO:0032374         regulation of cholesterol transport        11
## 4  GO:0045087                      innate immune response        65
## 5  GO:0007597        blood coagulation, intrinsic pathway         9
## 6  GO:0051004 regulation of lipoprotein lipase activit...         9
## 7  GO:0048146 positive regulation of fibroblast prolif...         6
## 8  GO:0032722 positive regulation of chemokine product...         6
## 9  GO:0010035             response to inorganic substance        62
## 10 GO:0042632                     cholesterol homeostasis        21
## 11 GO:0006953                        acute-phase response        14
## 12 GO:0006641              triglyceride metabolic process        19
## 13 GO:0044273           sulfur compound catabolic process         7
## 14 GO:0046470       phosphatidylcholine metabolic process         7
## 15 GO:0001960 negative regulation of cytokine-mediated...         7
## 16 GO:0030194    positive regulation of blood coagulation         7
## 17 GO:0034372 very-low-density lipoprotein particle re...         7
## 18 GO:0051917                  regulation of fibrinolysis         7
## 19 GO:0002673 regulation of acute inflammatory respons...        19
## 20 GO:0070613            regulation of protein processing        19
## 21 GO:0034381       plasma lipoprotein particle clearance        10
## 22 GO:1902106 negative regulation of leukocyte differe...         9
## 23 GO:0003014                        renal system process        15
##    Significant Expected weightFisher
## 1           11     3.53      0.00025
## 2           13     4.61      0.00109
## 3            5     1.49      0.00146
## 4           17     8.82      0.00209
## 5            5     1.22      0.00345
## 6            5     1.22      0.00345
## 7            4     0.81      0.00393
## 8            4     0.81      0.00393
## 9           11     8.41      0.00399
## 10           8     2.85      0.00410
## 11           6     1.90      0.00668
## 12          10     2.58      0.00736
## 13           4     0.95      0.00821
## 14           4     0.95      0.00821
## 15           4     0.95      0.00821
## 16           4     0.95      0.00821
## 17           4     0.95      0.00821
## 18           4     0.95      0.00821
## 19           8     2.58      0.00834
## 20           8     2.58      0.00834
## 21           5     1.36      0.00864
## 22           4     1.22      0.00879
## 23           6     2.04      0.00991