# Load libraries
library("edgeR")
## Loading required package: limma
library("limma")
library("plyr")
library("ashr")
## Warning: package 'ashr' was built under R version 3.4.4
library("cowplot")
## Warning: package 'cowplot' was built under R version 3.4.4
## Loading required package: ggplot2
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
library("vashr")
## Loading required package: SQUAREM
## Loading required package: qvalue
library("devtools")
#devtools::install_github("jhsiao999/mediation/pkg")
library("medinome")

Human-chimp heart- DE genes

# import sample labels
samples <- read.delim("../../../Reg_Evo_Primates/data/Sample_info_RNAseq_RIN.txt")
RIN <- samples$RIN
# expression
exprs <- read.table("../../../Reg_Evo_Primates/data/human_chimp_orth_exp_methyl_7725_hum.txt", sep="")
# methylation data
methyl <- read.csv("../../../Reg_Evo_Primates/data/chimp_human_orth_7725_avg_methyl_per_ts_gene.txt", sep="")

# Normalized gene expression data
cpm.voom.cyclic <- readRDS("../../../Reg_Evo_Primates/data/human_chimp_orth_cpm_voom_cyclic.rds")

Set FDR level/svalue

FDR_level <- 0.05

Human-chimp heart

# Check parameters

human_chimp_heart <- c(1, 5, 9, 13, 20, 24, 28)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]



Y <- hc_exprs_heart
two_species <- samples$Species[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
## Warning: package 'assertthat' was built under R version 3.4.4
fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    7688      37
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.1141498
ind_siga <- (fit_ash$result$svalue < 0.05)
ind_siga[ind_siga == TRUE] <- "red"
ind_siga[ind_siga == FALSE] <- "black"

combine_dataa <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_siga), stringsAsFactors = FALSE)

combine_dataa[,1] <- as.numeric(combine_dataa[,1])
combine_dataa[,2] <- as.numeric(combine_dataa[,2])
colnames(combine_dataa) <- c("Effect", "SE", "ind_siga")

hr_dea <- ggplot(combine_dataa, aes(combine_dataa$Effect, combine_dataa$SE)) + geom_point(color = c(combine_dataa$ind_siga)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0, 6.5) + xlim(-8, 8)


# Revisions- subset to significant values and look at the sign of the effect size

subset_red <- combine_dataa[which(combine_dataa$ind_siga == "red"), ]
summary(subset_red$Effect < 0)
##    Mode   FALSE    TRUE 
## logical      60      36

Human-chimp heart- non DE genes

# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val > FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0
ind_sigb <- (fit_ash$result$svalue < 0.05)
ind_sigb[ind_sigb == TRUE] <- "red"
ind_sigb[ind_sigb == FALSE] <- "black"

combine_datab <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_sigb), stringsAsFactors = FALSE)

combine_datab[,1] <- as.numeric(combine_datab[,1])
combine_datab[,2] <- as.numeric(combine_datab[,2])
colnames(combine_datab) <- c("Effect", "SE", "ind_sigb")

hr_deb <- ggplot(combine_datab, aes(combine_datab$Effect, combine_datab$SE)) + geom_point(color = c(combine_datab$ind_sigb)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0, 6.5) + xlim(-8, 8)

Human-rhesus heart- DE genes

# import sample labels
samples <- read.delim("../../../Reg_Evo_Primates/data/Sample_info_RNAseq_RIN.txt")
RIN <- samples$RIN
# expression
exprs <- read.table("../../../Reg_Evo_Primates/data/250_exp_avg_methyl_hcr_4155_genes.txt", sep="")


# Normalized gene expression data
cpm.voom.cyclic <- readRDS("../../../Reg_Evo_Primates/data/human_chimp_orth_cpm_voom_cyclic.rds")
# Check parameters

human_chimp_heart <- c(32, 36, 40, 44, 20, 24, 28)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:95]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]
rownames(hc_exprs_heart) <- exprs[,1]

methyl <- exprs_methyl[,human_chimp_heart]
rownames(methyl) <- exprs[,1]


Y <- hc_exprs_heart
two_species <- samples$Species[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    3889     266
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.3378995
ind_sig <- (fit_ash$result$svalue < 0.05)
ind_sig[ind_sig == TRUE] <- "red"
ind_sig[ind_sig == FALSE] <- "black"

combine_data <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_sig), stringsAsFactors = FALSE)

combine_data[,1] <- as.numeric(combine_data[,1])
combine_data[,2] <- as.numeric(combine_data[,2])
colnames(combine_data) <- c("Effect", "SE", "ind_sigc")

hr_dec <- ggplot(combine_data, aes(combine_data$Effect, combine_data$SE)) + geom_point(color = c(combine_data$ind_sigc)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0, 6.5) + xlim(-8, 8)

# Revisions- subset to significant values and look at the sign of the effect size

subset_red <- combine_data[which(combine_data$ind_sigc == "red"), ]
summary(subset_red$Effect > 0)
##    Mode   FALSE    TRUE 
## logical     184     186

Human versus rhesus heart- non DE genes

# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val > FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=5, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.005228758
ind_sigd <- (fit_ash$result$svalue < 0.05)
ind_sigd[ind_sigd == TRUE] <- "red"
ind_sigd[ind_sigd == FALSE] <- "black"

combine_datad <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_sigd), stringsAsFactors = FALSE)

combine_datad[,1] <- as.numeric(combine_datad[,1])
combine_datad[,2] <- as.numeric(combine_datad[,2])
colnames(combine_datad) <- c("Effect", "SE", "ind_sigd")

hr_ded <- ggplot(combine_datad, aes(combine_datad$Effect, combine_datad$SE)) + geom_point(color = c(combine_datad$ind_sigd)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0, 6.5) + xlim(-8, 8)

Human-chimp percents

# Make a table



comparison = data.frame(c("Heart", "Heart", "Kidney", "Kidney",  "Liver", "Liver", "Lung", "Lung"))
genes =  data.frame(c("DE", "non-DE", "DE", "non-DE", "DE", "non-DE", "DE", "non-DE"))

percentage = data.frame(c(0.114, 0, 0.122, 0, 0.256, 0, 0.1155, 0))



df <- cbind(comparison, genes, percentage)
colnames(df) <- c("Tissue", "Gene_type", "Percentage")
#df$Tissue_comparison <- factor(df$Tissue, levels = df$Tissue )


plot_interspecies_hc <- ggplot(data=df, aes(x=Gene_type, y=Percentage)) +
  geom_bar(stat="identity", fill = as.numeric(df$Gene_type)) + facet_wrap(~ df$Tissue) + xlab("Human-chimpanzee comparison") + ylab("% of genes for which \n DNAm could underlie \n gene expression") + theme(strip.text.x = element_text(face = "bold", size = 5.5)) + ylim(0, 0.42)

Human-rhesus percentages

# Make a table



comparison1 = data.frame(c("Heart", "Heart", "Kidney", "Kidney",  "Liver", "Liver", "Lung", "Lung"))
genes1 =  data.frame(c("DE", "non-DE", "DE", "non-DE", "DE", "non-DE", "DE", "non-DE"))

percentage1 = data.frame(c(0.337, 0.005, 0.333, 0.0059, 0.402, 0.00877, 0.215, 0.00029))



df1 <- cbind(comparison1, genes1, percentage1)
colnames(df1) <- c("Tissue", "Gene_type", "Percentage")
#df$Tissue_comparison <- factor(df$Tissue, levels = df$Tissue )


plot_interspecies_hr <- ggplot(data=df1, aes(x=Gene_type, y=Percentage)) +
  geom_bar(stat="identity", fill = as.numeric(df1$Gene_type)) + facet_wrap(~ df1$Tissue) + xlab("Human-rhesus comparison") + ylab("% of genes for which \n DNAm could underlie \n gene expression") + theme(strip.text.x = element_text(size = 5.5, face = "bold")) + ylim(0, 0.42)

Make into figure

eight_plots <- plot_grid(hr_dea, hr_dec, hr_deb, hr_ded, plot_interspecies_hc, plot_interspecies_hr, ncol = 2)
plot_fig4 <- plot_grid(eight_plots, ncol = 1, rel_heights=c(0.1, 1))

save_plot("/Users/laurenblake/Desktop/Tissue_Revisions/Figure_4_no_labels.pdf", plot_fig4,
           ncol = 2, # we're saving a grid plot of 2 columns
           nrow = 3, # and 2 rows
           # each individual subplot should have an aspect ratio of 1.3
           base_aspect_ratio = 1
           )

Get additional numbers for the text

Human-chimp kidney

# import sample labels
samples <- read.delim("../../../Reg_Evo_Primates/data/Sample_info_RNAseq_RIN.txt")
RIN <- samples$RIN
# expression
exprs <- read.table("../../../Reg_Evo_Primates/data/human_chimp_orth_exp_methyl_7725_hum.txt", sep="")
# methylation data
methyl <- read.csv("../../../Reg_Evo_Primates/data/chimp_human_orth_7725_avg_methyl_per_ts_gene.txt", sep="")

# Normalized gene expression data
cpm.voom.cyclic <- readRDS("../../../Reg_Evo_Primates/data/human_chimp_orth_cpm_voom_cyclic.rds")
# Check parameters

human_chimp_heart <- c(2, 6, 10, 14, 17, 21, 25, 29)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]



Y <- hc_exprs_heart
two_species <- samples$Species[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.1222571
ind_siga <- (fit_ash$result$svalue < 0.05)
ind_siga[ind_siga == TRUE] <- "red"
ind_siga[ind_siga == FALSE] <- "black"

combine_dataa <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_siga), stringsAsFactors = FALSE)

combine_dataa[,1] <- as.numeric(combine_dataa[,1])
combine_dataa[,2] <- as.numeric(combine_dataa[,2])
colnames(combine_dataa) <- c("Effect", "SE", "ind_siga")

#hr_dea <- ggplot(combine_dataa, aes(combine_dataa$Effect, combine_dataa$SE)) + geom_point(color = c(combine_dataa$ind_siga)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0, 6.5) + xlim(-8, 8)


# Revisions- subset to significant values and look at the sign of the effect size

subset_red <- combine_dataa[which(combine_dataa$ind_siga == "red"), ]
summary(subset_red$Effect > 0)
##    Mode   FALSE    TRUE 
## logical      11      28

Human-chimp liver

# Check parameters

human_chimp_heart <- c(3, 7, 11, 15, 18, 22, 26, 30)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]



Y <- hc_exprs_heart
two_species <- samples$Species[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.2562814
ind_siga <- (fit_ash$result$svalue < 0.05)
ind_siga[ind_siga == TRUE] <- "red"
ind_siga[ind_siga == FALSE] <- "black"

combine_dataa <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_siga), stringsAsFactors = FALSE)

combine_dataa[,1] <- as.numeric(combine_dataa[,1])
combine_dataa[,2] <- as.numeric(combine_dataa[,2])
colnames(combine_dataa) <- c("Effect", "SE", "ind_siga")

#hr_dea <- ggplot(combine_dataa, aes(combine_dataa$Effect, combine_dataa$SE)) + geom_point(color = c(combine_dataa$ind_siga)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0, 6.5) + xlim(-8, 8)


# Revisions- subset to significant values and look at the sign of the effect size

subset_red <- combine_dataa[which(combine_dataa$ind_siga == "red"), ]
summary(subset_red$Effect > 0)
##    Mode   FALSE    TRUE 
## logical      15      36

Human-chimp lung

# Check parameters

human_chimp_heart <- c(4, 8, 12, 16, 19, 23, 27, 31)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:79]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]


methyl <- exprs_methyl[,human_chimp_heart]



Y <- hc_exprs_heart
two_species <- samples$Species[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.1155378
ind_siga <- (fit_ash$result$svalue < 0.05)
ind_siga[ind_siga == TRUE] <- "red"
ind_siga[ind_siga == FALSE] <- "black"

combine_dataa <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_siga), stringsAsFactors = FALSE)

combine_dataa[,1] <- as.numeric(combine_dataa[,1])
combine_dataa[,2] <- as.numeric(combine_dataa[,2])
colnames(combine_dataa) <- c("Effect", "SE", "ind_siga")

#hr_dea <- ggplot(combine_dataa, aes(combine_dataa$Effect, combine_dataa$SE)) + geom_point(color = c(combine_dataa$ind_siga)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0, 6.5) + xlim(-8, 8)


# Revisions- subset to significant values and look at the sign of the effect size

subset_red <- combine_dataa[which(combine_dataa$ind_siga == "red"), ]
summary(subset_red$Effect > 0)
##    Mode   FALSE    TRUE 
## logical       6      23

Human-rhesus heart- DE genes

# import sample labels
samples <- read.delim("../../../Reg_Evo_Primates/data/Sample_info_RNAseq_RIN.txt")
RIN <- samples$RIN
# expression
exprs <- read.table("../../../Reg_Evo_Primates/data/250_exp_avg_methyl_hcr_4155_genes.txt", sep="")


# Normalized gene expression data
cpm.voom.cyclic <- readRDS("../../../Reg_Evo_Primates/data/human_chimp_orth_cpm_voom_cyclic.rds")
# Check parameters

human_chimp_heart <- c(32, 36, 40, 44, 20, 24, 28)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:95]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]
rownames(hc_exprs_heart) <- exprs[,1]

methyl <- exprs_methyl[,human_chimp_heart]
rownames(methyl) <- exprs[,1]


Y <- hc_exprs_heart
two_species <- samples$Species[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    3885     270
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:7]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.3388128
ind_sig <- (fit_ash$result$svalue < 0.05)
ind_sig[ind_sig == TRUE] <- "red"
ind_sig[ind_sig == FALSE] <- "black"

combine_data <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_sig), stringsAsFactors = FALSE)

combine_data[,1] <- as.numeric(combine_data[,1])
combine_data[,2] <- as.numeric(combine_data[,2])
colnames(combine_data) <- c("Effect", "SE", "ind_sigc")

hr_dec <- ggplot(combine_data, aes(combine_data$Effect, combine_data$SE)) + geom_point(color = c(combine_data$ind_sigc)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0, 6.5) + xlim(-8, 8)


# Revisions- subset to significant values and look at the sign of the effect size

subset_red <- combine_data[which(combine_data$ind_sigc == "red"), ]
summary(subset_red$Effect > 0)
##    Mode   FALSE    TRUE 
## logical     185     186

Human-rhesus kidney

# Check parameters

human_chimp_heart <- c(33, 37, 41, 45, 17, 21, 25, 29)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:95]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]
rownames(hc_exprs_heart) <- exprs[,1]

methyl <- exprs_methyl[,human_chimp_heart]
rownames(methyl) <- exprs[,1]


Y <- hc_exprs_heart
two_species <- samples$Species[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    4073      82
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.3333333
ind_sig <- (fit_ash$result$svalue < 0.05)
ind_sig[ind_sig == TRUE] <- "red"
ind_sig[ind_sig == FALSE] <- "black"

combine_data <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_sig), stringsAsFactors = FALSE)

combine_data[,1] <- as.numeric(combine_data[,1])
combine_data[,2] <- as.numeric(combine_data[,2])
colnames(combine_data) <- c("Effect", "SE", "ind_sigc")

hr_dec <- ggplot(combine_data, aes(combine_data$Effect, combine_data$SE)) + geom_point(color = c(combine_data$ind_sigc)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0, 6.5) + xlim(-8, 8)


# Revisions- subset to significant values and look at the sign of the effect size

subset_red <- combine_data[which(combine_data$ind_sigc == "red"), ]
summary(subset_red$Effect > 0)
##    Mode   FALSE    TRUE 
## logical      65      40

Human-rhesus liver

# Check parameters

human_chimp_heart <- c(34, 38, 42, 46, 18, 22, 26, 30)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:95]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]
rownames(hc_exprs_heart) <- exprs[,1]

methyl <- exprs_methyl[,human_chimp_heart]
rownames(methyl) <- exprs[,1]


Y <- hc_exprs_heart
two_species <- samples$Species[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    4057      98
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.4021352
ind_sig <- (fit_ash$result$svalue < 0.05)
ind_sig[ind_sig == TRUE] <- "red"
ind_sig[ind_sig == FALSE] <- "black"

combine_data <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_sig), stringsAsFactors = FALSE)

combine_data[,1] <- as.numeric(combine_data[,1])
combine_data[,2] <- as.numeric(combine_data[,2])
colnames(combine_data) <- c("Effect", "SE", "ind_sigc")

hr_dec <- ggplot(combine_data, aes(combine_data$Effect, combine_data$SE)) + geom_point(color = c(combine_data$ind_sigc)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0, 6.5) + xlim(-8, 8)


# Revisions- subset to significant values and look at the sign of the effect size

subset_red <- combine_data[which(combine_data$ind_sigc == "red"), ]
summary(subset_red$Effect > 0)
##    Mode   FALSE    TRUE 
## logical      72      41

Human-rhesus liver

# Check parameters

human_chimp_heart <- c(35, 39, 43, 47, 19, 23, 26, 31)

# Methylation
hc_exprs <- exprs[,2:48]
exprs_methyl <- exprs[,49:95]

# HC heart only


hc_exprs_heart <- hc_exprs[,human_chimp_heart]
rownames(hc_exprs_heart) <- exprs[,1]

methyl <- exprs_methyl[,human_chimp_heart]
rownames(methyl) <- exprs[,1]


Y <- hc_exprs_heart
two_species <- samples$Species[human_chimp_heart]
X <- droplevels.factor(two_species)
M <- methyl
RIN_subset <- RIN[human_chimp_heart]



check_values <- mediate.test.regressing(Y, X, M, RIN_subset)
fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash_normal <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")
summary(fit_ash_normal$result$svalue < 0.05)
##    Mode   FALSE    TRUE 
## logical    4024     131
# Run the linear model in limma
  design_1 <- model.matrix(~ X + RIN_subset)
  fit_all <- lmFit(cpm.voom.cyclic[,human_chimp_heart], design_1)
  fit_all <- eBayes(fit_all)

# Get results
  HvC_Heart_fit_all = topTable(fit_all, coef=2, adjust="BH", number=Inf, sort.by="none")
  HvC_Heart_fit_all_5perc <-
HvC_Heart_fit_all[which(HvC_Heart_fit_all$adj.P.Val < FDR_level), ]
  
 human_chimp_heart1 <-  rownames(methyl) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(methyl, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
M <- counts_human_chimp_heart_subset

human_chimp_heart1 <-  rownames(hc_exprs_heart) %in% rownames( HvC_Heart_fit_all_5perc)
human_chimp_heart1 <- as.data.frame(human_chimp_heart1)
counts_genes_in <- cbind(hc_exprs_heart, human_chimp_heart1)
counts_genes_in_cutoff <- subset(counts_genes_in, human_chimp_heart1 == "TRUE")
counts_human_chimp_heart_subset <- counts_genes_in_cutoff[,1:8]
Y <- counts_human_chimp_heart_subset

check_values <- mediate.test.regressing(Y, X, M, RIN_subset)

fit <- vash(check_values$d_se,df=6, singlecomp = T, unimodal = "auto") 
fit_ash <- ash(as.vector(check_values$d), fit$sd.post, mode = 0, mixcompdist = "normal")

calc <- as.array(summary(fit_ash$result$svalue < FDR_level))
calc2 <- as.numeric(calc[[2]])

(nrow(Y)-calc2)/nrow(Y)
## [1] 0.09192982
ind_sig <- (fit_ash$result$svalue < 0.05)
ind_sig[ind_sig == TRUE] <- "red"
ind_sig[ind_sig == FALSE] <- "black"

combine_data <- as.data.frame(cbind(fit_ash$result$betahat, fit_ash$result$sebetahat, ind_sig), stringsAsFactors = FALSE)

combine_data[,1] <- as.numeric(combine_data[,1])
combine_data[,2] <- as.numeric(combine_data[,2])
colnames(combine_data) <- c("Effect", "SE", "ind_sigc")

hr_dec <- ggplot(combine_data, aes(combine_data$Effect, combine_data$SE)) + geom_point(color = c(combine_data$ind_sigc)) + ylab("Standard error") + xlab("Effect size difference") + ylim(0, 6.5) + xlim(-8, 8)


# Revisions- subset to significant values and look at the sign of the effect size

subset_red <- combine_data[which(combine_data$ind_sigc == "red"), ]
summary(subset_red$Effect > 0)
##    Mode   FALSE    TRUE 
## logical      69      62