Open files

Note: due to pairwise ordering from another file (tissue_specific_tDMRs), tDMRs labeled as “hypermethylated” are actually hypomethylated in the lung and kidney.

# Library
library(bedr)
## Warning: package 'bedr' was built under R version 3.4.4
## 
## 
## ######################
## #### bedr v1.0.7 ####
## ######################
## 
## checking binary availability...
##   * Checking path for bedtools... PASS
##     /usr/local/bin/bedtools
##   * Checking path for bedops... FAIL
##   * Checking path for tabix... FAIL
## tests and examples will be skipped on R CMD check if binaries are missing
library("topGO")
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colMeans,
##     colnames, colSums, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply,
##     lengths, Map, mapply, match, mget, order, paste, pmax,
##     pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
##     rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min
## Loading required package: graph
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## 
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
#library("biomaRt")
library("clusterProfiler")
## Loading required package: DOSE
## DOSE v3.4.0  For help: https://guangchuangyu.github.io/DOSE
## 
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
## clusterProfiler v3.6.0  For help: https://guangchuangyu.github.io/clusterProfiler
## 
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu., Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
library("org.Hs.eg.db")
## 
human_chimp_heart_specific_25_conserved_H3K27_HEART <- read.delim("~/Desktop/Regulatory_Evol/ashlar-trial/data/overlap_reg/overlap_25/human_chimp_heart_specific_25_conserved_H3K27_HEART.bed", header=FALSE, stringsAsFactors = FALSE)

summary(human_chimp_heart_specific_25_conserved_H3K27_HEART$V4)
##    Length     Class      Mode 
##       455 character character
413/(413+42)
## [1] 0.9076923
human_chimp_heart_specific_25_conserved_H3K27_KIDNEY <- read.delim("~/Desktop/Regulatory_Evol/ashlar-trial/data/overlap_reg/overlap_25/human_chimp_kidney_specific_25_conserved_H3K27_KIDNEY.bed", header=FALSE, stringsAsFactors = FALSE)

summary(human_chimp_heart_specific_25_conserved_H3K27_KIDNEY$V4)
##    Length     Class      Mode 
##       897 character character
858/(858+39)
## [1] 0.9565217
human_chimp_heart_specific_25_conserved_H3K27_LIVER <- read.delim("~/Desktop/Regulatory_Evol/ashlar-trial/data/overlap_reg/overlap_25/human_chimp_liver_specific_25_conserved_H3K27_LIVER.bed", header=FALSE, stringsAsFactors = FALSE)

summary(human_chimp_heart_specific_25_conserved_H3K27_LIVER$V4)
##    Length     Class      Mode 
##      1333 character character
1081/(1081+252)
## [1] 0.8109527
human_chimp_heart_specific_25_conserved_H3K27_LUNG <- read.delim("~/Desktop/Regulatory_Evol/ashlar-trial/data/overlap_reg/overlap_25/human_chimp_lung_specific_25_conserved_H3K27_LUNG.bed", header=FALSE, stringsAsFactors = FALSE)

summary(human_chimp_heart_specific_25_conserved_H3K27_LUNG$V4)
##    Length     Class      Mode 
##        51 character character
30/(30+21)
## [1] 0.5882353
refGene_hg19_TSS <- read.delim("~/Desktop/Regulatory_Evol/ashlar-trial/data/overlap_reg/overlap_25/refGene_hg19_TSS.R", header=FALSE, stringsAsFactors = FALSE)

# Adjust so that bedtools will accept it
refGene_hg19_TSS[,3] <- refGene_hg19_TSS[,3] + 1 

# Now sort

human_chimp_heart_cons <- bedr.sort.region(human_chimp_heart_specific_25_conserved_H3K27_HEART[,1:3])
## SORTING
## VALIDATE REGIONS
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Overlapping regions can cause unexpected results.
refGene_hg19_TSS <- bedr.sort.region(refGene_hg19_TSS)
## SORTING
## VALIDATE REGIONS
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
closest_heart <- bedr(input = list(a = human_chimp_heart_specific_25_conserved_H3K27_HEART[,1:3], b = refGene_hg19_TSS), method = "closest", check.chr = FALSE)
##  * Processing input (1): a
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... PASS
##  * Checking for overlapping 'contiguous' regions... PASS
##  * Processing input (2): b
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... FAIL
##    The input for object is not *lexographically* ordered!
##    This can cause unexpected results for some set operations.
##    try: x <- bedr.sort.region(x)
##  * Checking for overlapping 'contiguous' regions... FAIL
##    The input for object has overlapping features!
##    This can cause unexpected results for some set operations.
##    i.e. x <- bedr.merge.region(x)
##    bedtools closest -a /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/a_bc557225e1f3.bed -b /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/b_bc5530e100bf.bed
# Sort based on upsteam/downstream

closest_heart_upstream <- closest_heart[which(closest_heart$V9 == "+"),]
closest_heart_downstream <- closest_heart[which(closest_heart$V9 == "-"),]
distance_heart_upstream <- as.numeric(closest_heart_upstream$V3) - as.numeric(closest_heart_upstream$V5)
distance_heart_downstream <- as.numeric(closest_heart_downstream$V5) - as.numeric(closest_heart_downstream$V3)

length(which(distance_heart_upstream > 0))/length(distance_heart_upstream)
## [1] 0.6037152
length(which(distance_heart_downstream > 0))/length(distance_heart_downstream)
## [1] 0.6140351
summary(distance_heart_downstream)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -751049  -12592    3472   15480   50862  386993

Kidney

# Now sort

human_chimp_heart_cons <- bedr.sort.region(human_chimp_heart_specific_25_conserved_H3K27_KIDNEY[,1:3])
## SORTING
## VALIDATE REGIONS
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
refGene_hg19_TSS <- bedr.sort.region(refGene_hg19_TSS)
## SORTING
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
closest_heart <- bedr(input = list(a = human_chimp_heart_cons, b = refGene_hg19_TSS), method = "closest", check.chr = FALSE)
##  * Processing input (1): a
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... PASS
##  * Checking for overlapping 'contiguous' regions... FAIL
##    The input for object has overlapping features!
##    This can cause unexpected results for some set operations.
##    i.e. x <- bedr.merge.region(x)
##  * Processing input (2): b
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... FAIL
##    The input for object is not *lexographically* ordered!
##    This can cause unexpected results for some set operations.
##    try: x <- bedr.sort.region(x)
##  * Checking for overlapping 'contiguous' regions... FAIL
##    The input for object has overlapping features!
##    This can cause unexpected results for some set operations.
##    i.e. x <- bedr.merge.region(x)
##    bedtools closest -a /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/a_bc553d991179.bed -b /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/b_bc551a874cb1.bed
# Sort based on upsteam/downstream

closest_heart_upstream <- closest_heart[which(closest_heart$V9 == "+"),]
closest_heart_downstream <- closest_heart[which(closest_heart$V9 == "-"),]
distance_heart_upstream <- as.numeric(closest_heart_upstream$V3) - as.numeric(closest_heart_upstream$V5)
distance_heart_downstream <- as.numeric(closest_heart_downstream$V5) - as.numeric(closest_heart_downstream$V3)

length(which(distance_heart_upstream > 0))/length(distance_heart_upstream)
## [1] 0.5685358
length(which(distance_heart_downstream > 0))/length(distance_heart_downstream)
## [1] 0.5628227

Liver

# Now sort

human_chimp_heart_cons <- bedr.sort.region(human_chimp_heart_specific_25_conserved_H3K27_LIVER[,1:3])
## SORTING
## VALIDATE REGIONS
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
refGene_hg19_TSS <- bedr.sort.region(refGene_hg19_TSS)
## SORTING
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
closest_heart <- bedr(input = list(a = human_chimp_heart_cons, b = refGene_hg19_TSS), method = "closest", check.chr = FALSE)
##  * Processing input (1): a
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... PASS
##  * Checking for overlapping 'contiguous' regions... FAIL
##    The input for object has overlapping features!
##    This can cause unexpected results for some set operations.
##    i.e. x <- bedr.merge.region(x)
##  * Processing input (2): b
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... FAIL
##    The input for object is not *lexographically* ordered!
##    This can cause unexpected results for some set operations.
##    try: x <- bedr.sort.region(x)
##  * Checking for overlapping 'contiguous' regions... FAIL
##    The input for object has overlapping features!
##    This can cause unexpected results for some set operations.
##    i.e. x <- bedr.merge.region(x)
##    bedtools closest -a /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/a_bc551ef7a8be.bed -b /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/b_bc551c85df81.bed
# Sort based on upsteam/downstream

closest_liver_upstream <- closest_heart[which(closest_heart$V9 == "+"),]
closest_liver_downstream <- closest_heart[which(closest_heart$V9 == "-"),]
distance_liver_upstream <- as.numeric(closest_liver_upstream$V3) - as.numeric(closest_liver_upstream$V5)
distance_liver_downstream <- as.numeric(closest_liver_downstream$V5) - as.numeric(closest_liver_downstream$V3)

length(which(distance_liver_upstream > 0))/length(distance_liver_upstream)
## [1] 0.6512043
length(which(distance_liver_downstream > 0))/length(distance_liver_downstream)
## [1] 0.6230032
all_distance <- c(distance_heart_upstream, distance_heart_downstream, distance_liver_upstream, distance_liver_downstream)
summary(all_distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -805181   -7548    1690   13079   31168  999526

Lung

# Now sort

human_chimp_heart_cons <- bedr.sort.region(human_chimp_heart_specific_25_conserved_H3K27_LUNG[,1:3])
## SORTING
## VALIDATE REGIONS
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Overlapping regions can cause unexpected results.
refGene_hg19_TSS <- bedr.sort.region(refGene_hg19_TSS)
## SORTING
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
closest_heart <- bedr(input = list(a = human_chimp_heart_cons, b = refGene_hg19_TSS), method = "closest", check.chr = FALSE)
##  * Processing input (1): a
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... PASS
##  * Checking for overlapping 'contiguous' regions... PASS
##  * Processing input (2): b
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... FAIL
##    The input for object is not *lexographically* ordered!
##    This can cause unexpected results for some set operations.
##    try: x <- bedr.sort.region(x)
##  * Checking for overlapping 'contiguous' regions... FAIL
##    The input for object has overlapping features!
##    This can cause unexpected results for some set operations.
##    i.e. x <- bedr.merge.region(x)
##    bedtools closest -a /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/a_bc55282f46b3.bed -b /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/b_bc551be5509c.bed
# Sort based on upsteam/downstream

closest_heart_upstream <- closest_heart[which(closest_heart$V9 == "+"),]
closest_heart_downstream <- closest_heart[which(closest_heart$V9 == "-"),]
distance_heart_upstream <- as.numeric(closest_heart_upstream$V3) - as.numeric(closest_heart_upstream$V5)
distance_heart_downstream <- as.numeric(closest_heart_downstream$V5) - as.numeric(closest_heart_downstream$V3)

length(which(distance_heart_upstream > 0))/length(distance_heart_upstream)
## [1] 0.673913
length(which(distance_heart_downstream > 0))/length(distance_heart_downstream)
## [1] 0.5714286

Separate into hypermethylated and overlapping H3K27ac

Heart

# Find the heart hypo
heart_hypo <- human_chimp_heart_specific_25_conserved_H3K27_HEART[which(human_chimp_heart_specific_25_conserved_H3K27_LIVER$V4 == "hyper" & human_chimp_heart_specific_25_conserved_H3K27_LIVER$V6 != "-1"),]

heart_hypo <- heart_hypo[complete.cases(heart_hypo), ]

# Find the closest gene

closest_heart <- bedr(input = list(a = heart_hypo[,1:3], b = refGene_hg19_TSS), method = "closest", check.chr = FALSE)
##  * Processing input (1): a
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... PASS
##  * Checking for overlapping 'contiguous' regions... PASS
##  * Processing input (2): b
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... FAIL
##    The input for object is not *lexographically* ordered!
##    This can cause unexpected results for some set operations.
##    try: x <- bedr.sort.region(x)
##  * Checking for overlapping 'contiguous' regions... FAIL
##    The input for object has overlapping features!
##    This can cause unexpected results for some set operations.
##    i.e. x <- bedr.merge.region(x)
##    bedtools closest -a /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/a_bc55367de709.bed -b /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/b_bc55788e817d.bed
# Convert the gene name to ensg

gene_id <- read.table("../../../Reg_Evo_Primates/data/ENSG_GENE_HG19.csv", stringsAsFactors = FALSE, header=TRUE, sep = ",")

comb_kidney <- merge(closest_heart, gene_id, by.x = c("V8"), by.y = c("Gene"))

comb_kidney$ensg
##  [1] "ENSG00000196839" "ENSG00000163050" "ENSG00000178878"
##  [4] "ENSG00000130762" "ENSG00000168646" "ENSG00000110092"
##  [7] "ENSG00000106554" "ENSG00000134871" "ENSG00000244274"
## [10] "ENSG00000244274" "ENSG00000135636" "ENSG00000135636"
## [13] "ENSG00000135636" "ENSG00000135636" "ENSG00000135636"
## [16] "ENSG00000135636" "ENSG00000135636" "ENSG00000108001"
## [19] "ENSG00000187773" "ENSG00000139132" "ENSG00000153303"
## [22] "ENSG00000143641" "ENSG00000144596" "ENSG00000115756"
## [25] "ENSG00000197081" "ENSG00000185507" "ENSG00000123104"
## [28] "ENSG00000127528" "ENSG00000106003" "ENSG00000107902"
## [31] "ENSG00000107902" "ENSG00000183060" "ENSG00000068305"
## [34] "ENSG00000068305" "ENSG00000068305" "ENSG00000068305"
## [37] "ENSG00000120254" "ENSG00000120254" "ENSG00000120254"
## [40] "ENSG00000133454" "ENSG00000124785" "ENSG00000162407"
## [43] "ENSG00000107263" "ENSG00000159788" "ENSG00000138835"
## [46] "ENSG00000064932" "ENSG00000104969" "ENSG00000130147"
## [49] "ENSG00000156222" "ENSG00000156222" "ENSG00000156222"
## [52] "ENSG00000156222" "ENSG00000095637" "ENSG00000095637"
## [55] "ENSG00000185594" "ENSG00000065882" "ENSG00000109927"
## [58] "ENSG00000100726" "ENSG00000185361" "ENSG00000198795"
## [61] "ENSG00000198597" "ENSG00000141579" "ENSG00000070476"
## [64] "ENSG00000070476"
# Convert the TSS to ensg as well 

ref_gene_hg19_ensg <- merge(refGene_hg19_TSS, gene_id, by.x = c("V5"), by.y = c("Gene"))

all_ref_gene_hg19_ensg <- unique(ref_gene_hg19_ensg$ensg)

heart_ref_gene <- all_ref_gene_hg19_ensg %in% comb_kidney$ensg

# Revisions- run GO
# Merge ENSG with true/false

test_gene <- as.numeric(as.vector(heart_ref_gene))
names(test_gene) <-  all_ref_gene_hg19_ensg

# Run topGO
go_data <- new("topGOdata",
                   ontology = "BP",
                   allGenes = test_gene, 
                    geneSel = function(allScore){
    return(allScore > 0)
},
                   nodeSize = 5,
                   annotationFun = annFUN.org,
                   mapping = "org.Hs.eg.db",
                   ID = "ensembl")
## 
## Building most specific GOs .....
##  ( 11450 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15456 GO terms and 36153 relations. )
## 
## Annotating nodes ...............
##  ( 14505 genes annotated to the GO terms. )
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 1568 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 15:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 14:  11 nodes to be scored   (0 eliminated genes)
## 
##   Level 13:  29 nodes to be scored   (103 eliminated genes)
## 
##   Level 12:  50 nodes to be scored   (298 eliminated genes)
## 
##   Level 11:  72 nodes to be scored   (2501 eliminated genes)
## 
##   Level 10:  107 nodes to be scored  (3937 eliminated genes)
## 
##   Level 9:   161 nodes to be scored  (5541 eliminated genes)
## 
##   Level 8:   201 nodes to be scored  (7251 eliminated genes)
## 
##   Level 7:   245 nodes to be scored  (8558 eliminated genes)
## 
##   Level 6:   265 nodes to be scored  (10615 eliminated genes)
## 
##   Level 5:   217 nodes to be scored  (12035 eliminated genes)
## 
##   Level 4:   127 nodes to be scored  (13480 eliminated genes)
## 
##   Level 3:   63 nodes to be scored   (13985 eliminated genes)
## 
##   Level 2:   18 nodes to be scored   (14177 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (14301 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
                         orderBy = "weightFisher", ranksOf = "weightFisher",
                         topNodes = sum(score(go_test) < .05))

go_table
##          GO.ID                                        Term Annotated
## 1   GO:0009113      purine nucleobase biosynthetic process        12
## 2   GO:0033197                       response to vitamin E        12
## 3   GO:0006351                transcription, DNA-templated      3277
## 4   GO:0032007        negative regulation of TOR signaling        40
## 5   GO:0043547      positive regulation of GTPase activity       354
## 6   GO:0060070             canonical Wnt signaling pathway       274
## 7   GO:1903078 positive regulation of protein localizat...        46
## 8   GO:0071320                   cellular response to cAMP        47
## 9   GO:0006501               C-terminal protein lipidation        62
## 10  GO:0007386           compartment pattern specification         5
## 11  GO:0070375                                ERK5 cascade         5
## 12  GO:0042321 negative regulation of circadian sleep/w...         5
## 13  GO:0009146 purine nucleoside triphosphate catabolic...         5
## 14  GO:0060509           Type I pneumocyte differentiation         5
## 15  GO:0046060                      dATP metabolic process         5
## 16  GO:0009256 10-formyltetrahydrofolate metabolic proc...         5
## 17  GO:0048541                   Peyer's patch development         5
## 18  GO:0009168 purine ribonucleoside monophosphate bios...        66
## 19  GO:0006863                 purine nucleobase transport         6
## 20  GO:0015942                   formate metabolic process         6
## 21  GO:0018242 protein O-linked glycosylation via serin...         6
## 22  GO:0001957                intramembranous ossification         6
## 23  GO:0060368 regulation of Fc receptor mediated stimu...         6
## 24  GO:0070141                            response to UV-A         6
## 25  GO:0038203                             TORC2 signaling         6
## 26  GO:0033632 regulation of cell-cell adhesion mediate...         6
## 27  GO:0060405               regulation of penile erection         6
## 28  GO:0009204 deoxyribonucleoside triphosphate catabol...         6
## 29  GO:0070244 negative regulation of thymocyte apoptot...         6
## 30  GO:0090630               activation of GTPase activity        76
## 31  GO:0002819      regulation of adaptive immune response       127
## 32  GO:0032261                   purine nucleotide salvage         7
## 33  GO:0018243 protein O-linked glycosylation via threo...         7
## 34  GO:0001778                      plasma membrane repair         7
## 35  GO:1903071 positive regulation of ER-associated ubi...         7
## 36  GO:0046654       tetrahydrofolate biosynthetic process         7
## 37  GO:0009120       deoxyribonucleoside metabolic process         7
## 38  GO:0019043              establishment of viral latency         7
## 39  GO:0048702        embryonic neurocranium morphogenesis         7
## 40  GO:1901642          nucleoside transmembrane transport         7
## 41  GO:2000109 regulation of macrophage apoptotic proce...         7
## 42  GO:0046130     purine ribonucleoside catabolic process         7
## 43  GO:0033601 positive regulation of mammary gland epi...         7
## 44  GO:0071499 cellular response to laminar fluid shear...         7
## 45  GO:0008277 regulation of G-protein coupled receptor...       120
## 46  GO:1904263      positive regulation of TORC1 signaling         8
## 47  GO:0038180       nerve growth factor signaling pathway         8
## 48  GO:0038063 collagen-activated tyrosine kinase recep...         8
## 49  GO:0060340 positive regulation of type I interferon...         8
## 50  GO:0072530 purine-containing compound transmembrane...         8
## 51  GO:0048703      embryonic viscerocranium morphogenesis         8
## 52  GO:0034127 regulation of MyD88-independent toll-lik...         8
## 53  GO:0008354                         germ cell migration         8
## 54  GO:0014807                 regulation of somitogenesis         8
## 55  GO:0045188 regulation of circadian sleep/wake cycle...         8
## 56  GO:0048387 negative regulation of retinoic acid rec...         8
## 57  GO:0002313 mature B cell differentiation involved i...        18
## 58  GO:0033089 positive regulation of T cell differenti...         9
## 59  GO:0002315        marginal zone B cell differentiation         9
## 60  GO:0035999            tetrahydrofolate interconversion         9
## 61  GO:0001821                         histamine secretion         9
## 62  GO:0003139         secondary heart field specification         9
## 63  GO:0039530                     MDA-5 signaling pathway         9
## 64  GO:0002634     regulation of germinal center formation         9
## 65  GO:0070255               regulation of mucus secretion         9
## 66  GO:0001889                           liver development       124
## 67  GO:0030282                         bone mineralization        93
## 68  GO:0002903 negative regulation of B cell apoptotic ...        10
## 69  GO:0070601       centromeric sister chromatid cohesion        10
## 70  GO:0046085                 adenosine metabolic process        10
## 71  GO:0042118                 endothelial cell activation        10
## 72  GO:0035414 negative regulation of catenin import in...        10
## 73  GO:0033033 negative regulation of myeloid cell apop...        11
## 74  GO:0001973        adenosine receptor signaling pathway        11
## 75  GO:0003413 chondrocyte differentiation involved in ...        11
## 76  GO:0033327                 Leydig cell differentiation        11
## 77  GO:0043249                      erythrocyte maturation        11
## 78  GO:1904293         negative regulation of ERAD pathway        12
## 79  GO:0045351      type I interferon biosynthetic process        12
## 80  GO:0055015 ventricular cardiac muscle cell developm...        12
## 81  GO:2000059 negative regulation of protein ubiquitin...        12
## 82  GO:0050862 positive regulation of T cell receptor s...        12
## 83  GO:0048311                  mitochondrion distribution        12
## 84  GO:0002281 macrophage activation involved in immune...        12
## 85  GO:0061430                bone trabecula morphogenesis        13
## 86  GO:0043650      dicarboxylic acid biosynthetic process        13
## 87  GO:0071361                cellular response to ethanol        13
## 88  GO:0032486             Rap protein signal transduction        13
## 89  GO:2000107 negative regulation of leukocyte apoptot...        39
## 90  GO:0036066               protein O-linked fucosylation        14
## 91  GO:0045725 positive regulation of glycogen biosynth...        15
## 92  GO:0060749          mammary gland alveolus development        15
## 93  GO:0001829        trophectodermal cell differentiation        15
## 94  GO:0006744             ubiquinone biosynthetic process        15
## 95  GO:0035023 regulation of Rho protein signal transdu...       118
## 96  GO:0051412                  response to corticosterone        16
## 97  GO:0051446 positive regulation of meiotic cell cycl...        16
## 98  GO:0072531 pyrimidine-containing compound transmemb...        16
## 99  GO:0055003                  cardiac myofibril assembly        16
## 100 GO:0009264       deoxyribonucleotide catabolic process        16
## 101 GO:0000122 negative regulation of transcription fro...       727
## 102 GO:0030111         regulation of Wnt signaling pathway       295
## 103 GO:0046386     deoxyribose phosphate catabolic process        17
## 104 GO:0045655      regulation of monocyte differentiation        17
## 105 GO:0071157    negative regulation of cell cycle arrest        18
## 106 GO:0032026                   response to magnesium ion        18
## 107 GO:0046655                folic acid metabolic process        18
## 108 GO:0007063     regulation of sister chromatid cohesion        19
## 109 GO:2000178 negative regulation of neural precursor ...        19
## 110 GO:0006139 nucleobase-containing compound metabolic...      5072
##     Significant Expected weightFisher
## 1             2     0.03      0.00046
## 2             2     0.03      0.00046
## 3            14     8.81      0.00408
## 4             2     0.11      0.00515
## 5             6     0.95      0.00540
## 6             4     0.74      0.00656
## 7             2     0.12      0.00677
## 8             2     0.13      0.00706
## 9             2     0.17      0.01203
## 10            1     0.01      0.01337
## 11            1     0.01      0.01337
## 12            1     0.01      0.01337
## 13            1     0.01      0.01337
## 14            1     0.01      0.01337
## 15            1     0.01      0.01337
## 16            1     0.01      0.01337
## 17            1     0.01      0.01337
## 18            2     0.18      0.01356
## 19            1     0.02      0.01603
## 20            1     0.02      0.01603
## 21            1     0.02      0.01603
## 22            1     0.02      0.01603
## 23            1     0.02      0.01603
## 24            1     0.02      0.01603
## 25            1     0.02      0.01603
## 26            1     0.02      0.01603
## 27            1     0.02      0.01603
## 28            1     0.02      0.01603
## 29            1     0.02      0.01603
## 30            2     0.20      0.01771
## 31            2     0.34      0.01835
## 32            1     0.02      0.01867
## 33            1     0.02      0.01867
## 34            1     0.02      0.01867
## 35            1     0.02      0.01867
## 36            1     0.02      0.01867
## 37            1     0.02      0.01867
## 38            1     0.02      0.01867
## 39            1     0.02      0.01867
## 40            1     0.02      0.01867
## 41            1     0.02      0.01867
## 42            1     0.02      0.01867
## 43            1     0.02      0.01867
## 44            1     0.02      0.01867
## 45            3     0.32      0.02091
## 46            1     0.02      0.02131
## 47            1     0.02      0.02131
## 48            1     0.02      0.02131
## 49            1     0.02      0.02131
## 50            1     0.02      0.02131
## 51            1     0.02      0.02131
## 52            1     0.02      0.02131
## 53            1     0.02      0.02131
## 54            1     0.02      0.02131
## 55            1     0.02      0.02131
## 56            1     0.02      0.02131
## 57            2     0.05      0.02335
## 58            1     0.02      0.02395
## 59            1     0.02      0.02395
## 60            1     0.02      0.02395
## 61            1     0.02      0.02395
## 62            1     0.02      0.02395
## 63            1     0.02      0.02395
## 64            1     0.02      0.02395
## 65            1     0.02      0.02395
## 66            3     0.33      0.02423
## 67            2     0.25      0.02584
## 68            1     0.03      0.02657
## 69            1     0.03      0.02657
## 70            1     0.03      0.02657
## 71            1     0.03      0.02657
## 72            1     0.03      0.02657
## 73            1     0.03      0.02919
## 74            1     0.03      0.02919
## 75            1     0.03      0.02919
## 76            1     0.03      0.02919
## 77            1     0.03      0.02919
## 78            1     0.03      0.03180
## 79            1     0.03      0.03180
## 80            1     0.03      0.03180
## 81            1     0.03      0.03180
## 82            1     0.03      0.03180
## 83            1     0.03      0.03180
## 84            1     0.03      0.03180
## 85            1     0.03      0.03441
## 86            1     0.03      0.03441
## 87            1     0.03      0.03441
## 88            1     0.03      0.03441
## 89            2     0.10      0.03614
## 90            1     0.04      0.03701
## 91            1     0.04      0.03960
## 92            1     0.04      0.03960
## 93            1     0.04      0.03960
## 94            1     0.04      0.03960
## 95            2     0.32      0.03999
## 96            1     0.04      0.04218
## 97            1     0.04      0.04218
## 98            1     0.04      0.04218
## 99            1     0.04      0.04218
## 100           1     0.04      0.04218
## 101           5     1.95      0.04391
## 102           4     0.79      0.04460
## 103           1     0.05      0.04476
## 104           1     0.05      0.04476
## 105           1     0.05      0.04733
## 106           1     0.05      0.04733
## 107           1     0.05      0.04733
## 108           1     0.05      0.04990
## 109           1     0.05      0.04990
## 110          19    13.64      0.04991
go_table_heart <- as.data.frame(cbind(go_table$GO.ID, go_table$weightFisher))

write.table(go_table_heart, "../data/go_table_heart.txt", quote = FALSE, row.names = FALSE, col.names = FALSE)

# Get names of kidney genes
sig.genes <- sigGenes(go_data)
goresults <- sapply(go_table$GO.ID, function(x)
    {
      genes<-genesInTerm(go_data, x) 
      genes[[1]][genes[[1]] %in% sig.genes]
    })

# cardiac myofibril assembly, positive regulation of heart rate, regulation of monocyte differentiation
goresults["GO:0055003"] 
## $`GO:0055003`
## [1] "ENSG00000068305"
goresults["GO:0010460"]
## $<NA>
## NULL
goresults["GO:0045655"]
## $`GO:0045655`
## [1] "ENSG00000185507"

Kidney

# Find the heart hypo
heart_hypo <- human_chimp_heart_specific_25_conserved_H3K27_KIDNEY[which(human_chimp_heart_specific_25_conserved_H3K27_KIDNEY$V4 == "hyper" & human_chimp_heart_specific_25_conserved_H3K27_KIDNEY$V6 != "-1"),]

heart_hypo <- heart_hypo[complete.cases(heart_hypo), ]
# Find the closest gene

closest_heart <- bedr(input = list(a = heart_hypo[,1:3], b = refGene_hg19_TSS), method = "closest", check.chr = FALSE)
##  * Processing input (1): a
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... PASS
##  * Checking for overlapping 'contiguous' regions... FAIL
##    The input for object has overlapping features!
##    This can cause unexpected results for some set operations.
##    i.e. x <- bedr.merge.region(x)
##  * Processing input (2): b
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... FAIL
##    The input for object is not *lexographically* ordered!
##    This can cause unexpected results for some set operations.
##    try: x <- bedr.sort.region(x)
##  * Checking for overlapping 'contiguous' regions... FAIL
##    The input for object has overlapping features!
##    This can cause unexpected results for some set operations.
##    i.e. x <- bedr.merge.region(x)
##    bedtools closest -a /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/a_bc55c1f3227.bed -b /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/b_bc553f2d81a4.bed
# Convert the gene name to ensg

gene_id <- read.table("../../../Reg_Evo_Primates/data/ENSG_GENE_HG19.csv", stringsAsFactors = FALSE, header=TRUE, sep = ",")

comb_kidney <- merge(closest_heart, gene_id, by.x = c("V8"), by.y = c("Gene"))

comb_kidney$ensg
##   [1] "ENSG00000081760" "ENSG00000198691" "ENSG00000100997"
##   [4] "ENSG00000100997" "ENSG00000117148" "ENSG00000169717"
##   [7] "ENSG00000169717" "ENSG00000139567" "ENSG00000145536"
##  [10] "ENSG00000134917" "ENSG00000105963" "ENSG00000162104"
##  [13] "ENSG00000164252" "ENSG00000063438" "ENSG00000063438"
##  [16] "ENSG00000110711" "ENSG00000196581" "ENSG00000196581"
##  [19] "ENSG00000179841" "ENSG00000053371" "ENSG00000151360"
##  [22] "ENSG00000178038" "ENSG00000178038" "ENSG00000106927"
##  [25] "ENSG00000159461" "ENSG00000089053" "ENSG00000138613"
##  [28] "ENSG00000138613" "ENSG00000198576" "ENSG00000147799"
##  [31] "ENSG00000116017" "ENSG00000029153" "ENSG00000029153"
##  [34] "ENSG00000029153" "ENSG00000029153" "ENSG00000029153"
##  [37] "ENSG00000117407" "ENSG00000117407" "ENSG00000117407"
##  [40] "ENSG00000126775" "ENSG00000126775" "ENSG00000068650"
##  [43] "ENSG00000068650" "ENSG00000068650" "ENSG00000068650"
##  [46] "ENSG00000241837" "ENSG00000241837" "ENSG00000158321"
##  [49] "ENSG00000158321" "ENSG00000183778" "ENSG00000183778"
##  [52] "ENSG00000183778" "ENSG00000183778" "ENSG00000183778"
##  [55] "ENSG00000128298" "ENSG00000125492" "ENSG00000125492"
##  [58] "ENSG00000125124" "ENSG00000050820" "ENSG00000127152"
##  [61] "ENSG00000127152" "ENSG00000110987" "ENSG00000110987"
##  [64] "ENSG00000116128" "ENSG00000162373" "ENSG00000165626"
##  [67] "ENSG00000165626" "ENSG00000122870" "ENSG00000122870"
##  [70] "ENSG00000136717" "ENSG00000136717" "ENSG00000136717"
##  [73] "ENSG00000136717" "ENSG00000136717" "ENSG00000136717"
##  [76] "ENSG00000136717" "ENSG00000136717" "ENSG00000136717"
##  [79] "ENSG00000136717" "ENSG00000136717" "ENSG00000136717"
##  [82] "ENSG00000136717" "ENSG00000136717" "ENSG00000136717"
##  [85] "ENSG00000136717" "ENSG00000136717" "ENSG00000136717"
##  [88] "ENSG00000136717" "ENSG00000136717" "ENSG00000125845"
##  [91] "ENSG00000101144" "ENSG00000104221" "ENSG00000165863"
##  [94] "ENSG00000109944" "ENSG00000109944" "ENSG00000173064"
##  [97] "ENSG00000235162" "ENSG00000089916" "ENSG00000089916"
## [100] "ENSG00000184601" "ENSG00000180336" "ENSG00000131943"
## [103] "ENSG00000175262" "ENSG00000128346" "ENSG00000185056"
## [106] "ENSG00000146521" "ENSG00000074410" "ENSG00000074410"
## [109] "ENSG00000158966" "ENSG00000198286" "ENSG00000130940"
## [112] "ENSG00000130940" "ENSG00000160200" "ENSG00000135736"
## [115] "ENSG00000168491" "ENSG00000168491" "ENSG00000109881"
## [118] "ENSG00000109881" "ENSG00000135127" "ENSG00000135127"
## [121] "ENSG00000135127" "ENSG00000110092" "ENSG00000138764"
## [124] "ENSG00000126353" "ENSG00000146731" "ENSG00000146731"
## [127] "ENSG00000112149" "ENSG00000112149" "ENSG00000198752"
## [130] "ENSG00000184661" "ENSG00000166589" "ENSG00000166589"
## [133] "ENSG00000166589" "ENSG00000166589" "ENSG00000179242"
## [136] "ENSG00000179242" "ENSG00000113100" "ENSG00000227767"
## [139] "ENSG00000153046" "ENSG00000101489" "ENSG00000101489"
## [142] "ENSG00000101489" "ENSG00000101489" "ENSG00000134873"
## [145] "ENSG00000134873" "ENSG00000159261" "ENSG00000169583"
## [148] "ENSG00000120885" "ENSG00000148842" "ENSG00000148842"
## [151] "ENSG00000148842" "ENSG00000149972" "ENSG00000149972"
## [154] "ENSG00000149972" "ENSG00000134871" "ENSG00000130635"
## [157] "ENSG00000165644" "ENSG00000198612" "ENSG00000198612"
## [160] "ENSG00000091704" "ENSG00000091704" "ENSG00000169372"
## [163] "ENSG00000146592" "ENSG00000146592" "ENSG00000160202"
## [166] "ENSG00000060069" "ENSG00000060069" "ENSG00000134030"
## [169] "ENSG00000134030" "ENSG00000153015" "ENSG00000153015"
## [172] "ENSG00000153015" "ENSG00000019186" "ENSG00000019186"
## [175] "ENSG00000132437" "ENSG00000079785" "ENSG00000141141"
## [178] "ENSG00000160049" "ENSG00000160049" "ENSG00000109016"
## [181] "ENSG00000100697" "ENSG00000151240" "ENSG00000150672"
## [184] "ENSG00000151208" "ENSG00000080845" "ENSG00000119689"
## [187] "ENSG00000119689" "ENSG00000119689" "ENSG00000119689"
## [190] "ENSG00000143006" "ENSG00000079805" "ENSG00000079805"
## [193] "ENSG00000079805" "ENSG00000079805" "ENSG00000079805"
## [196] "ENSG00000130226" "ENSG00000156162" "ENSG00000149636"
## [199] "ENSG00000149636" "ENSG00000149636" "ENSG00000047579"
## [202] "ENSG00000047579" "ENSG00000047579" "ENSG00000143507"
## [205] "ENSG00000143507" "ENSG00000229847" "ENSG00000229847"
## [208] "ENSG00000229847" "ENSG00000164778" "ENSG00000167280"
## [211] "ENSG00000104714" "ENSG00000119715" "ENSG00000119715"
## [214] "ENSG00000072840" "ENSG00000064655" "ENSG00000064655"
## [217] "ENSG00000088926" "ENSG00000168309" "ENSG00000168309"
## [220] "ENSG00000184731" "ENSG00000196814" "ENSG00000196814"
## [223] "ENSG00000154153" "ENSG00000189292" "ENSG00000182183"
## [226] "ENSG00000115363" "ENSG00000047662" "ENSG00000219438"
## [229] "ENSG00000168672" "ENSG00000083857" "ENSG00000083857"
## [232] "ENSG00000165140" "ENSG00000112787" "ENSG00000112787"
## [235] "ENSG00000085265" "ENSG00000126266" "ENSG00000066468"
## [238] "ENSG00000066468" "ENSG00000066468" "ENSG00000126500"
## [241] "ENSG00000075426" "ENSG00000164916" "ENSG00000114861"
## [244] "ENSG00000150893" "ENSG00000033170" "ENSG00000166206"
## [247] "ENSG00000116717" "ENSG00000116717" "ENSG00000116717"
## [250] "ENSG00000116717" "ENSG00000116717" "ENSG00000116717"
## [253] "ENSG00000143641" "ENSG00000182870" "ENSG00000130700"
## [256] "ENSG00000168505" "ENSG00000165702" "ENSG00000165702"
## [259] "ENSG00000167741" "ENSG00000167741" "ENSG00000074047"
## [262] "ENSG00000107249" "ENSG00000151948" "ENSG00000151948"
## [265] "ENSG00000137198" "ENSG00000156049" "ENSG00000197177"
## [268] "ENSG00000077585" "ENSG00000158292" "ENSG00000119714"
## [271] "ENSG00000178075" "ENSG00000125651" "ENSG00000077235"
## [274] "ENSG00000069812" "ENSG00000166135" "ENSG00000180448"
## [277] "ENSG00000108753" "ENSG00000108753" "ENSG00000108753"
## [280] "ENSG00000108753" "ENSG00000136720" "ENSG00000165868"
## [283] "ENSG00000165868" "ENSG00000003147" "ENSG00000003147"
## [286] "ENSG00000003147" "ENSG00000160223" "ENSG00000172201"
## [289] "ENSG00000172201" "ENSG00000134049" "ENSG00000117154"
## [292] "ENSG00000117154" "ENSG00000185950" "ENSG00000113430"
## [295] "ENSG00000113430" "ENSG00000105655" "ENSG00000105655"
## [298] "ENSG00000105655" "ENSG00000188385" "ENSG00000154118"
## [301] "ENSG00000107104" "ENSG00000102781" "ENSG00000189337"
## [304] "ENSG00000115041" "ENSG00000169427" "ENSG00000053918"
## [307] "ENSG00000134504" "ENSG00000167977" "ENSG00000127663"
## [310] "ENSG00000127663" "ENSG00000119537" "ENSG00000131149"
## [313] "ENSG00000136051" "ENSG00000257093" "ENSG00000122778"
## [316] "ENSG00000122778" "ENSG00000140950" "ENSG00000173214"
## [319] "ENSG00000130294" "ENSG00000066735" "ENSG00000066735"
## [322] "ENSG00000067082" "ENSG00000067082" "ENSG00000067082"
## [325] "ENSG00000067082" "ENSG00000150361" "ENSG00000197705"
## [328] "ENSG00000053747" "ENSG00000053747" "ENSG00000091136"
## [331] "ENSG00000168961" "ENSG00000168961" "ENSG00000168961"
## [334] "ENSG00000132130" "ENSG00000073350" "ENSG00000073350"
## [337] "ENSG00000073350" "ENSG00000103227" "ENSG00000103227"
## [340] "ENSG00000113368" "ENSG00000163380" "ENSG00000136944"
## [343] "ENSG00000136944" "ENSG00000136944" "ENSG00000167210"
## [346] "ENSG00000153395" "ENSG00000136141" "ENSG00000136141"
## [349] "ENSG00000136141" "ENSG00000146006" "ENSG00000146006"
## [352] "ENSG00000166159" "ENSG00000166159" "ENSG00000166159"
## [355] "ENSG00000166159" "ENSG00000166159" "ENSG00000166159"
## [358] "ENSG00000119681" "ENSG00000002822" "ENSG00000002822"
## [361] "ENSG00000002822" "ENSG00000178573" "ENSG00000178573"
## [364] "ENSG00000185022" "ENSG00000172469" "ENSG00000135525"
## [367] "ENSG00000135525" "ENSG00000135525" "ENSG00000138834"
## [370] "ENSG00000138834" "ENSG00000137337" "ENSG00000085871"
## [373] "ENSG00000085871" "ENSG00000085871" "ENSG00000085871"
## [376] "ENSG00000249567" "ENSG00000221323" "ENSG00000207588"
## [379] "ENSG00000105926" "ENSG00000158186" "ENSG00000182170"
## [382] "ENSG00000153944" "ENSG00000138823" "ENSG00000129422"
## [385] "ENSG00000129422" "ENSG00000132938" "ENSG00000133055"
## [388] "ENSG00000169994" "ENSG00000187556" "ENSG00000196498"
## [391] "ENSG00000196498" "ENSG00000196498" "ENSG00000196498"
## [394] "ENSG00000196498" "ENSG00000196498" "ENSG00000104419"
## [397] "ENSG00000104419" "ENSG00000104419" "ENSG00000104419"
## [400] "ENSG00000115286" "ENSG00000103154" "ENSG00000109320"
## [403] "ENSG00000109320" "ENSG00000148826" "ENSG00000148826"
## [406] "ENSG00000106410" "ENSG00000106410" "ENSG00000074771"
## [409] "ENSG00000170485" "ENSG00000214285" "ENSG00000105954"
## [412] "ENSG00000182667" "ENSG00000182667" "ENSG00000182667"
## [415] "ENSG00000115758" "ENSG00000205927" "ENSG00000183715"
## [418] "ENSG00000115947" "ENSG00000178602" "ENSG00000172818"
## [421] "ENSG00000125850" "ENSG00000099864" "ENSG00000099864"
## [424] "ENSG00000116183" "ENSG00000116183" "ENSG00000137819"
## [427] "ENSG00000102699" "ENSG00000242265" "ENSG00000242265"
## [430] "ENSG00000142655" "ENSG00000142655" "ENSG00000067057"
## [433] "ENSG00000144824" "ENSG00000165443" "ENSG00000105229"
## [436] "ENSG00000126822" "ENSG00000171680" "ENSG00000114554"
## [439] "ENSG00000114554" "ENSG00000114554" "ENSG00000141682"
## [442] "ENSG00000124225" "ENSG00000122512" "ENSG00000122512"
## [445] "ENSG00000168081" "ENSG00000014138" "ENSG00000166169"
## [448] "ENSG00000166169" "ENSG00000166169" "ENSG00000166169"
## [451] "ENSG00000106536" "ENSG00000106536" "ENSG00000162407"
## [454] "ENSG00000147535" "ENSG00000147535" "ENSG00000147535"
## [457] "ENSG00000154001" "ENSG00000170325" "ENSG00000170325"
## [460] "ENSG00000110851" "ENSG00000175785" "ENSG00000111725"
## [463] "ENSG00000106617" "ENSG00000065675" "ENSG00000065675"
## [466] "ENSG00000111218" "ENSG00000111218" "ENSG00000155066"
## [469] "ENSG00000155066" "ENSG00000155066" "ENSG00000205352"
## [472] "ENSG00000205352" "ENSG00000205352" "ENSG00000205352"
## [475] "ENSG00000167157" "ENSG00000163636" "ENSG00000169398"
## [478] "ENSG00000169398" "ENSG00000169398" "ENSG00000134644"
## [481] "ENSG00000134644" "ENSG00000112531" "ENSG00000112531"
## [484] "ENSG00000112531" "ENSG00000112531" "ENSG00000107560"
## [487] "ENSG00000168461" "ENSG00000141542" "ENSG00000017797"
## [490] "ENSG00000136828" "ENSG00000136828" "ENSG00000136828"
## [493] "ENSG00000185989" "ENSG00000165105" "ENSG00000122035"
## [496] "ENSG00000122965" "ENSG00000122965" "ENSG00000122965"
## [499] "ENSG00000160957" "ENSG00000143954" "ENSG00000143954"
## [502] "ENSG00000143954" "ENSG00000159788" "ENSG00000091844"
## [505] "ENSG00000158106" "ENSG00000183421" "ENSG00000139797"
## [508] "ENSG00000113269" "ENSG00000069667" "ENSG00000100784"
## [511] "ENSG00000100784" "ENSG00000141564" "ENSG00000141564"
## [514] "ENSG00000165526" "ENSG00000165526" "ENSG00000182010"
## [517] "ENSG00000198838" "ENSG00000198838" "ENSG00000256463"
## [520] "ENSG00000256463" "ENSG00000256463" "ENSG00000149021"
## [523] "ENSG00000136546" "ENSG00000136546" "ENSG00000146555"
## [526] "ENSG00000146555" "ENSG00000187764" "ENSG00000187764"
## [529] "ENSG00000163904" "ENSG00000152217" "ENSG00000183576"
## [532] "ENSG00000183576" "ENSG00000225383" "ENSG00000164690"
## [535] "ENSG00000142178" "ENSG00000159263" "ENSG00000159263"
## [538] "ENSG00000159263" "ENSG00000159263" "ENSG00000113504"
## [541] "ENSG00000152779" "ENSG00000162241" "ENSG00000162241"
## [544] "ENSG00000173262" "ENSG00000157765" "ENSG00000157765"
## [547] "ENSG00000160190" "ENSG00000143036" "ENSG00000143036"
## [550] "ENSG00000143036" "ENSG00000162426" "ENSG00000066230"
## [553] "ENSG00000173930" "ENSG00000166949" "ENSG00000127616"
## [556] "ENSG00000127616" "ENSG00000127616" "ENSG00000127616"
## [559] "ENSG00000188176" "ENSG00000048471" "ENSG00000172803"
## [562] "ENSG00000184557" "ENSG00000125398" "ENSG00000125398"
## [565] "ENSG00000185594" "ENSG00000187678" "ENSG00000187678"
## [568] "ENSG00000124783" "ENSG00000149418" "ENSG00000133121"
## [571] "ENSG00000133121" "ENSG00000102572" "ENSG00000152953"
## [574] "ENSG00000015592" "ENSG00000165730" "ENSG00000165730"
## [577] "ENSG00000165730" "ENSG00000165730" "ENSG00000198203"
## [580] "ENSG00000198203" "ENSG00000117614" "ENSG00000117614"
## [583] "ENSG00000131018" "ENSG00000131018" "ENSG00000078269"
## [586] "ENSG00000143028" "ENSG00000130699" "ENSG00000164691"
## [589] "ENSG00000164691" "ENSG00000164691" "ENSG00000167291"
## [592] "ENSG00000095383" "ENSG00000095383" "ENSG00000095383"
## [595] "ENSG00000095383" "ENSG00000095383" "ENSG00000095383"
## [598] "ENSG00000006638" "ENSG00000006638" "ENSG00000187621"
## [601] "ENSG00000160180" "ENSG00000163235" "ENSG00000163235"
## [604] "ENSG00000041988" "ENSG00000041988" "ENSG00000146426"
## [607] "ENSG00000106829" "ENSG00000155957" "ENSG00000144120"
## [610] "ENSG00000144120" "ENSG00000144120" "ENSG00000151353"
## [613] "ENSG00000151353" "ENSG00000214128" "ENSG00000160055"
## [616] "ENSG00000136205" "ENSG00000162341" "ENSG00000115705"
## [619] "ENSG00000115705" "ENSG00000126602" "ENSG00000167632"
## [622] "ENSG00000071575" "ENSG00000071575" "ENSG00000137699"
## [625] "ENSG00000100815" "ENSG00000103671" "ENSG00000119121"
## [628] "ENSG00000154743" "ENSG00000154743" "ENSG00000179981"
## [631] "ENSG00000182463" "ENSG00000121297" "ENSG00000214063"
## [634] "ENSG00000032389" "ENSG00000123607" "ENSG00000105948"
## [637] "ENSG00000105948" "ENSG00000105948" "ENSG00000100154"
## [640] "ENSG00000143367" "ENSG00000143367" "ENSG00000233608"
## [643] "ENSG00000184787" "ENSG00000184787" "ENSG00000184787"
## [646] "ENSG00000178473" "ENSG00000115446" "ENSG00000113763"
## [649] "ENSG00000101558" "ENSG00000101558" "ENSG00000129003"
## [652] "ENSG00000129003" "ENSG00000129003" "ENSG00000129003"
## [655] "ENSG00000176428" "ENSG00000185274" "ENSG00000185274"
## [658] "ENSG00000185274" "ENSG00000065268" "ENSG00000160193"
## [661] "ENSG00000103175" "ENSG00000198373" "ENSG00000076924"
## [664] "ENSG00000196584" "ENSG00000130733" "ENSG00000184828"
## [667] "ENSG00000213588" "ENSG00000014164" "ENSG00000175048"
## [670] "ENSG00000175048" "ENSG00000153786" "ENSG00000153786"
## [673] "ENSG00000156639" "ENSG00000156639" "ENSG00000133858"
## [676] "ENSG00000139800" "ENSG00000015171" "ENSG00000015171"
## [679] "ENSG00000102935" "ENSG00000102935" "ENSG00000183621"
## [682] "ENSG00000183621" "ENSG00000183621" "ENSG00000183621"
## [685] "ENSG00000183621" "ENSG00000183621" "ENSG00000183621"
## [688] "ENSG00000197857" "ENSG00000197857" "ENSG00000225614"
## [691] "ENSG00000225614" "ENSG00000171425" "ENSG00000157657"
## [694] "ENSG00000157657" "ENSG00000183779" "ENSG00000183779"
# Convert the TSS to ensg as well 

ref_gene_hg19_ensg <- merge(refGene_hg19_TSS, gene_id, by.x = c("V5"), by.y = c("Gene"))

all_ref_gene_hg19_ensg <- unique(ref_gene_hg19_ensg$ensg)

kidney_ref_gene <- all_ref_gene_hg19_ensg %in% comb_kidney$ensg

# Revisions- run GO
# Merge ENSG with true/false

test_gene <- as.numeric(as.vector(heart_ref_gene))
names(test_gene) <-  all_ref_gene_hg19_ensg

# Run topGO
go_data <- new("topGOdata",
                   ontology = "BP",
                   allGenes = test_gene, 
                    geneSel = function(allScore){
    return(allScore > 0)
},
                   nodeSize = 5,
                   annotationFun = annFUN.org,
                   mapping = "org.Hs.eg.db",
                   ID = "ensembl")
## 
## Building most specific GOs .....
##  ( 11450 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15456 GO terms and 36153 relations. )
## 
## Annotating nodes ...............
##  ( 14505 genes annotated to the GO terms. )
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 1568 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 15:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 14:  11 nodes to be scored   (0 eliminated genes)
## 
##   Level 13:  29 nodes to be scored   (103 eliminated genes)
## 
##   Level 12:  50 nodes to be scored   (298 eliminated genes)
## 
##   Level 11:  72 nodes to be scored   (2501 eliminated genes)
## 
##   Level 10:  107 nodes to be scored  (3937 eliminated genes)
## 
##   Level 9:   161 nodes to be scored  (5541 eliminated genes)
## 
##   Level 8:   201 nodes to be scored  (7251 eliminated genes)
## 
##   Level 7:   245 nodes to be scored  (8558 eliminated genes)
## 
##   Level 6:   265 nodes to be scored  (10615 eliminated genes)
## 
##   Level 5:   217 nodes to be scored  (12035 eliminated genes)
## 
##   Level 4:   127 nodes to be scored  (13480 eliminated genes)
## 
##   Level 3:   63 nodes to be scored   (13985 eliminated genes)
## 
##   Level 2:   18 nodes to be scored   (14177 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (14301 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
                         orderBy = "weightFisher", ranksOf = "weightFisher",
                         topNodes = sum(score(go_test) < .01))

go_table
##        GO.ID                                        Term Annotated
## 1 GO:0009113      purine nucleobase biosynthetic process        12
## 2 GO:0033197                       response to vitamin E        12
## 3 GO:0006351                transcription, DNA-templated      3277
## 4 GO:0032007        negative regulation of TOR signaling        40
## 5 GO:0043547      positive regulation of GTPase activity       354
## 6 GO:0060070             canonical Wnt signaling pathway       274
## 7 GO:1903078 positive regulation of protein localizat...        46
## 8 GO:0071320                   cellular response to cAMP        47
##   Significant Expected weightFisher
## 1           2     0.03      0.00046
## 2           2     0.03      0.00046
## 3          14     8.81      0.00408
## 4           2     0.11      0.00515
## 5           6     0.95      0.00540
## 6           4     0.74      0.00656
## 7           2     0.12      0.00677
## 8           2     0.13      0.00706
goresults["GO:0098719"] 
## $<NA>
## NULL

Liver

# Find the heart hypo
heart_hypo <- human_chimp_heart_specific_25_conserved_H3K27_LIVER[which(human_chimp_heart_specific_25_conserved_H3K27_LIVER$V4 == "hypo" & human_chimp_heart_specific_25_conserved_H3K27_LIVER$V6 != "-1"),]

heart_hypo <- heart_hypo[complete.cases(heart_hypo), ]

# Find the closest gene

closest_heart <- bedr(input = list(a = heart_hypo[,1:3], b = refGene_hg19_TSS), method = "closest", check.chr = FALSE)
##  * Processing input (1): a
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... PASS
##  * Checking for overlapping 'contiguous' regions... FAIL
##    The input for object has overlapping features!
##    This can cause unexpected results for some set operations.
##    i.e. x <- bedr.merge.region(x)
##  * Processing input (2): b
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... FAIL
##    The input for object is not *lexographically* ordered!
##    This can cause unexpected results for some set operations.
##    try: x <- bedr.sort.region(x)
##  * Checking for overlapping 'contiguous' regions... FAIL
##    The input for object has overlapping features!
##    This can cause unexpected results for some set operations.
##    i.e. x <- bedr.merge.region(x)
##    bedtools closest -a /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/a_bc556d78f000.bed -b /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/b_bc556f7b3abb.bed
# Convert the gene name to ensg

gene_id <- read.table("../../../Reg_Evo_Primates/data/ENSG_GENE_HG19.csv", stringsAsFactors = FALSE, header=TRUE, sep = ",")

comb_kidney <- merge(closest_heart, gene_id, by.x = c("V8"), by.y = c("Gene"))

comb_kidney$ensg
##    [1] "ENSG00000121410" "ENSG00000114771" "ENSG00000114771"
##    [4] "ENSG00000141338" "ENSG00000121270" "ENSG00000108846"
##    [7] "ENSG00000108846" "ENSG00000108846" "ENSG00000108846"
##   [10] "ENSG00000138075" "ENSG00000076555" "ENSG00000076555"
##   [13] "ENSG00000181513" "ENSG00000181513" "ENSG00000181513"
##   [16] "ENSG00000181513" "ENSG00000122729" "ENSG00000197142"
##   [19] "ENSG00000183549" "ENSG00000143199" "ENSG00000143199"
##   [22] "ENSG00000143199" "ENSG00000143199" "ENSG00000162104"
##   [25] "ENSG00000183077" "ENSG00000183077" "ENSG00000183077"
##   [28] "ENSG00000157985" "ENSG00000157985" "ENSG00000157985"
##   [31] "ENSG00000165923" "ENSG00000165923" "ENSG00000165923"
##   [34] "ENSG00000144891" "ENSG00000144891" "ENSG00000144891"
##   [37] "ENSG00000144891" "ENSG00000172482" "ENSG00000113492"
##   [40] "ENSG00000042286" "ENSG00000042286" "ENSG00000011243"
##   [43] "ENSG00000106948" "ENSG00000198610" "ENSG00000198610"
##   [46] "ENSG00000142208" "ENSG00000023330" "ENSG00000023330"
##   [49] "ENSG00000170017" "ENSG00000170017" "ENSG00000170017"
##   [52] "ENSG00000170017" "ENSG00000170017" "ENSG00000170017"
##   [55] "ENSG00000170017" "ENSG00000170017" "ENSG00000033011"
##   [58] "ENSG00000198796" "ENSG00000136383" "ENSG00000136383"
##   [61] "ENSG00000162551" "ENSG00000162551" "ENSG00000162551"
##   [64] "ENSG00000106927" "ENSG00000139344" "ENSG00000166126"
##   [67] "ENSG00000166025" "ENSG00000166025" "ENSG00000116337"
##   [70] "ENSG00000116337" "ENSG00000174945" "ENSG00000116194"
##   [73] "ENSG00000116194" "ENSG00000198483" "ENSG00000154945"
##   [76] "ENSG00000131620" "ENSG00000131620" "ENSG00000074855"
##   [79] "ENSG00000042753" "ENSG00000042753" "ENSG00000132703"
##   [82] "ENSG00000110243" "ENSG00000110243" "ENSG00000084674"
##   [85] "ENSG00000175336" "ENSG00000118520" "ENSG00000118520"
##   [88] "ENSG00000118520" "ENSG00000118520" "ENSG00000196843"
##   [91] "ENSG00000133794" "ENSG00000133794" "ENSG00000133794"
##   [94] "ENSG00000136950" "ENSG00000111339" "ENSG00000111339"
##   [97] "ENSG00000100325" "ENSG00000100325" "ENSG00000141505"
##  [100] "ENSG00000141505" "ENSG00000141505" "ENSG00000141505"
##  [103] "ENSG00000161944" "ENSG00000161944" "ENSG00000161944"
##  [106] "ENSG00000161944" "ENSG00000161944" "ENSG00000126522"
##  [109] "ENSG00000126522" "ENSG00000126522" "ENSG00000169696"
##  [112] "ENSG00000169696" "ENSG00000169696" "ENSG00000169136"
##  [115] "ENSG00000169136" "ENSG00000157087" "ENSG00000157087"
##  [118] "ENSG00000186009" "ENSG00000107518" "ENSG00000204842"
##  [121] "ENSG00000158321" "ENSG00000119986" "ENSG00000109956"
##  [124] "ENSG00000109956" "ENSG00000182272" "ENSG00000117411"
##  [127] "ENSG00000156273" "ENSG00000175866" "ENSG00000175866"
##  [130] "ENSG00000175866" "ENSG00000175866" "ENSG00000255056"
##  [133] "ENSG00000172530" "ENSG00000172530" "ENSG00000172530"
##  [136] "ENSG00000172530" "ENSG00000172530" "ENSG00000114200"
##  [139] "ENSG00000153094" "ENSG00000153094" "ENSG00000153094"
##  [142] "ENSG00000153094" "ENSG00000153094" "ENSG00000153094"
##  [145] "ENSG00000153094" "ENSG00000153094" "ENSG00000153094"
##  [148] "ENSG00000153094" "ENSG00000153094" "ENSG00000153094"
##  [151] "ENSG00000153094" "ENSG00000153094" "ENSG00000153094"
##  [154] "ENSG00000153094" "ENSG00000153094" "ENSG00000153094"
##  [157] "ENSG00000121380" "ENSG00000121380" "ENSG00000100739"
##  [160] "ENSG00000015475" "ENSG00000015475" "ENSG00000136717"
##  [163] "ENSG00000136717" "ENSG00000136717" "ENSG00000136717"
##  [166] "ENSG00000136717" "ENSG00000136717" "ENSG00000136717"
##  [169] "ENSG00000136717" "ENSG00000136717" "ENSG00000136717"
##  [172] "ENSG00000115760" "ENSG00000197299" "ENSG00000168487"
##  [175] "ENSG00000168487" "ENSG00000168487" "ENSG00000168487"
##  [178] "ENSG00000140299" "ENSG00000145919" "ENSG00000145919"
##  [181] "ENSG00000176720" "ENSG00000234235" "ENSG00000214688"
##  [184] "ENSG00000110696" "ENSG00000111412" "ENSG00000173064"
##  [187] "ENSG00000214900" "ENSG00000128944" "ENSG00000128944"
##  [190] "ENSG00000128944" "ENSG00000166780" "ENSG00000125149"
##  [193] "ENSG00000125149" "ENSG00000174109" "ENSG00000185504"
##  [196] "ENSG00000185504" "ENSG00000168675" "ENSG00000168675"
##  [199] "ENSG00000168675" "ENSG00000168675" "ENSG00000168675"
##  [202] "ENSG00000168675" "ENSG00000130813" "ENSG00000130813"
##  [205] "ENSG00000130173" "ENSG00000168275" "ENSG00000159403"
##  [208] "ENSG00000159403" "ENSG00000182326" "ENSG00000182326"
##  [211] "ENSG00000166278" "ENSG00000166278" "ENSG00000166278"
##  [214] "ENSG00000166278" "ENSG00000101474" "ENSG00000128254"
##  [217] "ENSG00000215012" "ENSG00000100249" "ENSG00000115998"
##  [220] "ENSG00000204128" "ENSG00000215217" "ENSG00000039537"
##  [223] "ENSG00000146521" "ENSG00000244291" "ENSG00000170279"
##  [226] "ENSG00000157131" "ENSG00000021852" "ENSG00000021852"
##  [229] "ENSG00000021852" "ENSG00000176919" "ENSG00000113600"
##  [232] "ENSG00000134508" "ENSG00000134508" "ENSG00000154040"
##  [235] "ENSG00000154040" "ENSG00000154040" "ENSG00000154040"
##  [238] "ENSG00000154040" "ENSG00000171735" "ENSG00000171735"
##  [241] "ENSG00000171735" "ENSG00000171735" "ENSG00000042493"
##  [244] "ENSG00000110888" "ENSG00000110888" "ENSG00000110888"
##  [247] "ENSG00000110888" "ENSG00000110888" "ENSG00000213995"
##  [250] "ENSG00000213995" "ENSG00000142273" "ENSG00000142273"
##  [253] "ENSG00000139899" "ENSG00000139899" "ENSG00000160200"
##  [256] "ENSG00000160200" "ENSG00000144648" "ENSG00000005059"
##  [259] "ENSG00000160799" "ENSG00000144395" "ENSG00000165972"
##  [262] "ENSG00000161573" "ENSG00000110092" "ENSG00000110092"
##  [265] "ENSG00000177575" "ENSG00000177575" "ENSG00000177575"
##  [268] "ENSG00000177575" "ENSG00000134061" "ENSG00000178562"
##  [271] "ENSG00000178562" "ENSG00000178562" "ENSG00000110651"
##  [274] "ENSG00000151465" "ENSG00000099804" "ENSG00000198752"
##  [277] "ENSG00000149798" "ENSG00000170779" "ENSG00000170779"
##  [280] "ENSG00000136807" "ENSG00000166446" "ENSG00000153879"
##  [283] "ENSG00000153879" "ENSG00000241832" "ENSG00000149187"
##  [286] "ENSG00000011523" "ENSG00000011523" "ENSG00000159398"
##  [289] "ENSG00000159398" "ENSG00000087237" "ENSG00000243649"
##  [292] "ENSG00000165410" "ENSG00000165410" "ENSG00000165410"
##  [295] "ENSG00000016391" "ENSG00000016391" "ENSG00000085872"
##  [298] "ENSG00000177830" "ENSG00000177830" "ENSG00000177830"
##  [301] "ENSG00000177830" "ENSG00000177830" "ENSG00000110721"
##  [304] "ENSG00000110721" "ENSG00000104879" "ENSG00000159261"
##  [307] "ENSG00000159261" "ENSG00000125246" "ENSG00000091317"
##  [310] "ENSG00000091317" "ENSG00000170293" "ENSG00000150656"
##  [313] "ENSG00000133313" "ENSG00000168763" "ENSG00000168763"
##  [316] "ENSG00000158158" "ENSG00000184144" "ENSG00000106078"
##  [319] "ENSG00000196739" "ENSG00000118004" "ENSG00000118004"
##  [322] "ENSG00000118004" "ENSG00000118004" "ENSG00000118004"
##  [325] "ENSG00000118004" "ENSG00000131143" "ENSG00000178772"
##  [328] "ENSG00000178772" "ENSG00000178772" "ENSG00000100884"
##  [331] "ENSG00000157184" "ENSG00000167193" "ENSG00000167193"
##  [334] "ENSG00000178585" "ENSG00000178585" "ENSG00000040531"
##  [337] "ENSG00000040531" "ENSG00000111249" "ENSG00000111249"
##  [340] "ENSG00000154639" "ENSG00000154639" "ENSG00000154639"
##  [343] "ENSG00000154639" "ENSG00000154639" "ENSG00000166347"
##  [346] "ENSG00000166347" "ENSG00000166347" "ENSG00000166347"
##  [349] "ENSG00000166347" "ENSG00000166347" "ENSG00000166394"
##  [352] "ENSG00000140505" "ENSG00000135929" "ENSG00000135929"
##  [355] "ENSG00000135929" "ENSG00000138115" "ENSG00000138115"
##  [358] "ENSG00000138115" "ENSG00000138115" "ENSG00000106258"
##  [361] "ENSG00000106258" "ENSG00000106258" "ENSG00000106258"
##  [364] "ENSG00000106258" "ENSG00000106258" "ENSG00000106258"
##  [367] "ENSG00000106258" "ENSG00000106258" "ENSG00000106258"
##  [370] "ENSG00000106258" "ENSG00000106258" "ENSG00000106258"
##  [373] "ENSG00000106258" "ENSG00000106258" "ENSG00000106258"
##  [376] "ENSG00000186115" "ENSG00000171954" "ENSG00000171954"
##  [379] "ENSG00000186529" "ENSG00000186529" "ENSG00000186529"
##  [382] "ENSG00000164488" "ENSG00000112977" "ENSG00000172992"
##  [385] "ENSG00000043093" "ENSG00000132437" "ENSG00000100201"
##  [388] "ENSG00000100201" "ENSG00000141141" "ENSG00000100150"
##  [391] "ENSG00000100150" "ENSG00000077044" "ENSG00000102967"
##  [394] "ENSG00000181192" "ENSG00000181192" "ENSG00000067596"
##  [397] "ENSG00000184047" "ENSG00000184047" "ENSG00000184047"
##  [400] "ENSG00000184047" "ENSG00000258498" "ENSG00000258498"
##  [403] "ENSG00000258498" "ENSG00000066084" "ENSG00000108176"
##  [406] "ENSG00000108176" "ENSG00000163687" "ENSG00000163687"
##  [409] "ENSG00000167130" "ENSG00000167130" "ENSG00000167130"
##  [412] "ENSG00000184845" "ENSG00000143507" "ENSG00000161326"
##  [415] "ENSG00000108861" "ENSG00000107404" "ENSG00000105204"
##  [418] "ENSG00000105204" "ENSG00000105204" "ENSG00000123179"
##  [421] "ENSG00000134463" "ENSG00000229715" "ENSG00000229715"
##  [424] "ENSG00000142634" "ENSG00000169242" "ENSG00000169242"
##  [427] "ENSG00000099617" "ENSG00000172889" "ENSG00000172889"
##  [430] "ENSG00000146648" "ENSG00000146648" "ENSG00000146648"
##  [433] "ENSG00000146648" "ENSG00000171570" "ENSG00000173812"
##  [436] "ENSG00000106263" "ENSG00000106263" "ENSG00000066044"
##  [439] "ENSG00000196361" "ENSG00000196361" "ENSG00000155849"
##  [442] "ENSG00000155849" "ENSG00000155849" "ENSG00000134759"
##  [445] "ENSG00000134759" "ENSG00000134759" "ENSG00000134759"
##  [448] "ENSG00000134759" "ENSG00000134759" "ENSG00000134759"
##  [451] "ENSG00000160963" "ENSG00000167136" "ENSG00000116016"
##  [454] "ENSG00000182580" "ENSG00000196411" "ENSG00000196411"
##  [457] "ENSG00000130427" "ENSG00000113719" "ENSG00000086619"
##  [460] "ENSG00000139684" "ENSG00000196405" "ENSG00000205436"
##  [463] "ENSG00000107371" "ENSG00000107371" "ENSG00000182197"
##  [466] "ENSG00000092820" "ENSG00000126218" "ENSG00000180210"
##  [469] "ENSG00000159784" "ENSG00000109794" "ENSG00000152102"
##  [472] "ENSG00000185442" "ENSG00000188916" "ENSG00000189319"
##  [475] "ENSG00000133477" "ENSG00000188522" "ENSG00000176853"
##  [478] "ENSG00000169710" "ENSG00000177294" "ENSG00000100225"
##  [481] "ENSG00000100225" "ENSG00000171557" "ENSG00000171557"
##  [484] "ENSG00000115226" "ENSG00000160097" "ENSG00000160097"
##  [487] "ENSG00000170802" "ENSG00000106701" "ENSG00000106701"
##  [490] "ENSG00000070404" "ENSG00000160282" "ENSG00000160282"
##  [493] "ENSG00000107164" "ENSG00000165060" "ENSG00000165060"
##  [496] "ENSG00000165060" "ENSG00000163251" "ENSG00000131482"
##  [499] "ENSG00000131482" "ENSG00000154252" "ENSG00000108479"
##  [502] "ENSG00000106648" "ENSG00000106648" "ENSG00000128310"
##  [505] "ENSG00000130005" "ENSG00000130005" "ENSG00000111640"
##  [508] "ENSG00000213512" "ENSG00000213512" "ENSG00000106633"
##  [511] "ENSG00000106633" "ENSG00000084734" "ENSG00000023909"
##  [514] "ENSG00000178795" "ENSG00000141098" "ENSG00000141098"
##  [517] "ENSG00000141098" "ENSG00000151892" "ENSG00000115486"
##  [520] "ENSG00000115486" "ENSG00000157017" "ENSG00000157017"
##  [523] "ENSG00000157017" "ENSG00000157017" "ENSG00000157017"
##  [526] "ENSG00000157017" "ENSG00000157017" "ENSG00000157017"
##  [529] "ENSG00000157017" "ENSG00000157017" "ENSG00000137960"
##  [532] "ENSG00000151948" "ENSG00000016864" "ENSG00000016864"
##  [535] "ENSG00000203972" "ENSG00000196743" "ENSG00000196743"
##  [538] "ENSG00000112699" "ENSG00000114349" "ENSG00000114349"
##  [541] "ENSG00000100522" "ENSG00000185245" "ENSG00000119927"
##  [544] "ENSG00000119927" "ENSG00000119927" "ENSG00000119927"
##  [547] "ENSG00000112293" "ENSG00000132975" "ENSG00000180758"
##  [550] "ENSG00000166123" "ENSG00000176153" "ENSG00000176153"
##  [553] "ENSG00000176153" "ENSG00000075240" "ENSG00000083307"
##  [556] "ENSG00000139835" "ENSG00000100577" "ENSG00000077809"
##  [559] "ENSG00000077809" "ENSG00000077809" "ENSG00000077809"
##  [562] "ENSG00000077809" "ENSG00000101181" "ENSG00000169919"
##  [565] "ENSG00000130600" "ENSG00000148702" "ENSG00000148702"
##  [568] "ENSG00000148702" "ENSG00000130956" "ENSG00000084110"
##  [571] "ENSG00000084110" "ENSG00000084110" "ENSG00000115677"
##  [574] "ENSG00000115677" "ENSG00000122557" "ENSG00000114315"
##  [577] "ENSG00000114315" "ENSG00000168509" "ENSG00000168509"
##  [580] "ENSG00000168509" "ENSG00000168509" "ENSG00000109758"
##  [583] "ENSG00000137133" "ENSG00000100084" "ENSG00000108924"
##  [586] "ENSG00000117305" "ENSG00000117305" "ENSG00000115756"
##  [589] "ENSG00000158104" "ENSG00000110169" "ENSG00000110169"
##  [592] "ENSG00000113905" "ENSG00000113905" "ENSG00000005700"
##  [595] "ENSG00000115738" "ENSG00000142166" "ENSG00000142166"
##  [598] "ENSG00000115457" "ENSG00000243646" "ENSG00000115590"
##  [601] "ENSG00000115590" "ENSG00000196083" "ENSG00000196083"
##  [604] "ENSG00000196083" "ENSG00000196083" "ENSG00000196083"
##  [607] "ENSG00000196083" "ENSG00000196083" "ENSG00000196083"
##  [610] "ENSG00000196083" "ENSG00000196083" "ENSG00000196083"
##  [613] "ENSG00000196083" "ENSG00000196083" "ENSG00000196083"
##  [616] "ENSG00000196083" "ENSG00000196083" "ENSG00000196083"
##  [619] "ENSG00000196083" "ENSG00000196083" "ENSG00000196083"
##  [622] "ENSG00000196083" "ENSG00000196083" "ENSG00000196083"
##  [625] "ENSG00000196083" "ENSG00000136689" "ENSG00000103522"
##  [628] "ENSG00000103522" "ENSG00000100385" "ENSG00000203485"
##  [631] "ENSG00000203485" "ENSG00000203485" "ENSG00000122641"
##  [634] "ENSG00000148384" "ENSG00000254647" "ENSG00000254647"
##  [637] "ENSG00000254647" "ENSG00000129965" "ENSG00000129965"
##  [640] "ENSG00000186480" "ENSG00000186480" "ENSG00000186480"
##  [643] "ENSG00000186480" "ENSG00000186480" "ENSG00000186480"
##  [646] "ENSG00000125629" "ENSG00000134070" "ENSG00000134070"
##  [649] "ENSG00000172183" "ENSG00000078747" "ENSG00000078747"
##  [652] "ENSG00000078747" "ENSG00000083457" "ENSG00000083457"
##  [655] "ENSG00000162267" "ENSG00000055955" "ENSG00000055955"
##  [658] "ENSG00000100605" "ENSG00000100605" "ENSG00000100605"
##  [661] "ENSG00000205726" "ENSG00000205726" "ENSG00000163166"
##  [664] "ENSG00000009765" "ENSG00000009765" "ENSG00000009765"
##  [667] "ENSG00000140044" "ENSG00000154118" "ENSG00000102781"
##  [670] "ENSG00000176407" "ENSG00000069424" "ENSG00000158445"
##  [673] "ENSG00000124780" "ENSG00000169427" "ENSG00000188997"
##  [676] "ENSG00000089094" "ENSG00000089094" "ENSG00000107077"
##  [679] "ENSG00000107077" "ENSG00000166783" "ENSG00000166783"
##  [682] "ENSG00000166783" "ENSG00000176542" "ENSG00000125337"
##  [685] "ENSG00000125337" "ENSG00000118922" "ENSG00000129911"
##  [688] "ENSG00000128607" "ENSG00000128607" "ENSG00000025800"
##  [691] "ENSG00000185896" "ENSG00000002549" "ENSG00000107929"
##  [694] "ENSG00000086730" "ENSG00000086730" "ENSG00000086730"
##  [697] "ENSG00000213398" "ENSG00000164406" "ENSG00000164406"
##  [700] "ENSG00000145826" "ENSG00000116977" "ENSG00000072163"
##  [703] "ENSG00000166035" "ENSG00000166035" "ENSG00000166035"
##  [706] "ENSG00000101670" "ENSG00000189067" "ENSG00000189067"
##  [709] "ENSG00000162761" "ENSG00000102910" "ENSG00000134324"
##  [712] "ENSG00000134324" "ENSG00000070018" "ENSG00000175489"
##  [715] "ENSG00000160233" "ENSG00000124831" "ENSG00000124831"
##  [718] "ENSG00000124831" "ENSG00000124831" "ENSG00000143669"
##  [721] "ENSG00000197063" "ENSG00000145050" "ENSG00000101460"
##  [724] "ENSG00000076984" "ENSG00000119487" "ENSG00000119487"
##  [727] "ENSG00000119487" "ENSG00000119487" "ENSG00000119487"
##  [730] "ENSG00000119487" "ENSG00000099785" "ENSG00000099785"
##  [733] "ENSG00000099785" "ENSG00000009724" "ENSG00000009724"
##  [736] "ENSG00000197971" "ENSG00000197971" "ENSG00000197971"
##  [739] "ENSG00000197971" "ENSG00000126217" "ENSG00000180398"
##  [742] "ENSG00000180398" "ENSG00000128285" "ENSG00000090674"
##  [745] "ENSG00000135679" "ENSG00000135679" "ENSG00000135679"
##  [748] "ENSG00000135679" "ENSG00000108510" "ENSG00000152127"
##  [751] "ENSG00000170430" "ENSG00000199065" "ENSG00000199065"
##  [754] "ENSG00000199065" "ENSG00000221039" "ENSG00000221063"
##  [757] "ENSG00000208017" "ENSG00000199075" "ENSG00000207727"
##  [760] "ENSG00000207875" "ENSG00000207875" "ENSG00000130382"
##  [763] "ENSG00000115648" "ENSG00000115648" "ENSG00000009950"
##  [766] "ENSG00000009950" "ENSG00000009950" "ENSG00000009950"
##  [769] "ENSG00000108960" "ENSG00000173269" "ENSG00000075643"
##  [772] "ENSG00000166391" "ENSG00000166391" "ENSG00000107186"
##  [775] "ENSG00000107186" "ENSG00000107186" "ENSG00000214026"
##  [778] "ENSG00000173867" "ENSG00000173531" "ENSG00000149480"
##  [781] "ENSG00000122085" "ENSG00000122085" "ENSG00000122085"
##  [784] "ENSG00000122085" "ENSG00000132613" "ENSG00000110921"
##  [787] "ENSG00000110921" "ENSG00000172927" "ENSG00000172927"
##  [790] "ENSG00000106436" "ENSG00000065534" "ENSG00000065534"
##  [793] "ENSG00000065534" "ENSG00000065534" "ENSG00000091536"
##  [796] "ENSG00000091536" "ENSG00000236242" "ENSG00000139597"
##  [799] "ENSG00000139597" "ENSG00000008130" "ENSG00000008130"
##  [802] "ENSG00000152620" "ENSG00000141562" "ENSG00000141562"
##  [805] "ENSG00000141562" "ENSG00000166833" "ENSG00000255043"
##  [808] "ENSG00000255043" "ENSG00000255043" "ENSG00000196498"
##  [811] "ENSG00000196498" "ENSG00000196498" "ENSG00000166579"
##  [814] "ENSG00000166579" "ENSG00000151366" "ENSG00000151366"
##  [817] "ENSG00000151366" "ENSG00000165802" "ENSG00000165802"
##  [820] "ENSG00000165802" "ENSG00000165802" "ENSG00000165802"
##  [823] "ENSG00000173848" "ENSG00000107954" "ENSG00000100906"
##  [826] "ENSG00000104825" "ENSG00000104825" "ENSG00000101004"
##  [829] "ENSG00000163293" "ENSG00000172548" "ENSG00000172548"
##  [832] "ENSG00000136783" "ENSG00000169251" "ENSG00000169251"
##  [835] "ENSG00000166741" "ENSG00000015520" "ENSG00000015520"
##  [838] "ENSG00000015520" "ENSG00000015520" "ENSG00000148734"
##  [841] "ENSG00000242349" "ENSG00000131910" "ENSG00000012504"
##  [844] "ENSG00000012504" "ENSG00000012504" "ENSG00000012504"
##  [847] "ENSG00000012504" "ENSG00000012504" "ENSG00000012504"
##  [850] "ENSG00000012504" "ENSG00000144852" "ENSG00000144852"
##  [853] "ENSG00000144852" "ENSG00000143257" "ENSG00000143257"
##  [856] "ENSG00000143257" "ENSG00000143257" "ENSG00000143257"
##  [859] "ENSG00000143257" "ENSG00000143257" "ENSG00000143257"
##  [862] "ENSG00000143257" "ENSG00000143257" "ENSG00000143257"
##  [865] "ENSG00000143257" "ENSG00000143257" "ENSG00000143257"
##  [868] "ENSG00000143257" "ENSG00000143257" "ENSG00000143257"
##  [871] "ENSG00000143257" "ENSG00000143257" "ENSG00000143257"
##  [874] "ENSG00000143257" "ENSG00000143257" "ENSG00000143257"
##  [877] "ENSG00000143257" "ENSG00000143257" "ENSG00000143257"
##  [880] "ENSG00000143257" "ENSG00000143257" "ENSG00000143257"
##  [883] "ENSG00000143257" "ENSG00000113580" "ENSG00000113580"
##  [886] "ENSG00000113580" "ENSG00000113580" "ENSG00000113580"
##  [889] "ENSG00000113580" "ENSG00000113580" "ENSG00000113580"
##  [892] "ENSG00000113580" "ENSG00000113580" "ENSG00000113580"
##  [895] "ENSG00000113580" "ENSG00000113580" "ENSG00000113580"
##  [898] "ENSG00000113580" "ENSG00000113580" "ENSG00000113580"
##  [901] "ENSG00000113580" "ENSG00000113580" "ENSG00000113580"
##  [904] "ENSG00000113580" "ENSG00000113580" "ENSG00000116833"
##  [907] "ENSG00000116833" "ENSG00000116833" "ENSG00000116833"
##  [910] "ENSG00000116833" "ENSG00000116833" "ENSG00000180530"
##  [913] "ENSG00000171119" "ENSG00000151413" "ENSG00000090273"
##  [916] "ENSG00000168101" "ENSG00000168101" "ENSG00000168101"
##  [919] "ENSG00000168101" "ENSG00000132182" "ENSG00000125450"
##  [922] "ENSG00000176046" "ENSG00000176046" "ENSG00000065154"
##  [925] "ENSG00000065154" "ENSG00000180304" "ENSG00000111325"
##  [928] "ENSG00000060491" "ENSG00000119547" "ENSG00000119547"
##  [931] "ENSG00000224855" "ENSG00000175619" "ENSG00000140961"
##  [934] "ENSG00000185624" "ENSG00000179364" "ENSG00000100266"
##  [937] "ENSG00000006712" "ENSG00000006712" "ENSG00000006712"
##  [940] "ENSG00000140694" "ENSG00000140694" "ENSG00000140694"
##  [943] "ENSG00000140694" "ENSG00000140694" "ENSG00000140694"
##  [946] "ENSG00000140694" "ENSG00000140694" "ENSG00000140694"
##  [949] "ENSG00000111224" "ENSG00000185630" "ENSG00000185630"
##  [952] "ENSG00000185630" "ENSG00000167081" "ENSG00000173599"
##  [955] "ENSG00000166228" "ENSG00000175198" "ENSG00000175198"
##  [958] "ENSG00000175198" "ENSG00000114054" "ENSG00000114054"
##  [961] "ENSG00000140479" "ENSG00000140479" "ENSG00000140479"
##  [964] "ENSG00000140479" "ENSG00000140479" "ENSG00000140479"
##  [967] "ENSG00000140479" "ENSG00000140479" "ENSG00000169174"
##  [970] "ENSG00000116005" "ENSG00000106244" "ENSG00000113721"
##  [973] "ENSG00000113721" "ENSG00000155660" "ENSG00000107438"
##  [976] "ENSG00000133027" "ENSG00000133027" "ENSG00000124299"
##  [979] "ENSG00000124299" "ENSG00000124299" "ENSG00000124299"
##  [982] "ENSG00000124299" "ENSG00000124299" "ENSG00000161031"
##  [985] "ENSG00000107537" "ENSG00000060642" "ENSG00000155629"
##  [988] "ENSG00000155629" "ENSG00000174238" "ENSG00000090975"
##  [991] "ENSG00000178385" "ENSG00000076356" "ENSG00000115946"
##  [994] "ENSG00000101346" "ENSG00000101346" "ENSG00000127948"
##  [997] "ENSG00000168938" "ENSG00000168938" "ENSG00000108179"
## [1000] "ENSG00000118898" "ENSG00000219607" "ENSG00000137713"
## [1003] "ENSG00000137713" "ENSG00000137713" "ENSG00000137713"
## [1006] "ENSG00000137713" "ENSG00000066027" "ENSG00000066027"
## [1009] "ENSG00000066027" "ENSG00000100239" "ENSG00000100239"
## [1012] "ENSG00000100239" "ENSG00000100239" "ENSG00000110075"
## [1015] "ENSG00000110075" "ENSG00000110075" "ENSG00000110075"
## [1018] "ENSG00000110075" "ENSG00000110075" "ENSG00000147596"
## [1021] "ENSG00000116690" "ENSG00000116690" "ENSG00000116690"
## [1024] "ENSG00000116690" "ENSG00000131791" "ENSG00000154229"
## [1027] "ENSG00000067606" "ENSG00000101000" "ENSG00000117707"
## [1030] "ENSG00000117707" "ENSG00000126231" "ENSG00000126231"
## [1033] "ENSG00000161542" "ENSG00000161542" "ENSG00000248405"
## [1036] "ENSG00000080815" "ENSG00000080815" "ENSG00000159352"
## [1039] "ENSG00000180822" "ENSG00000180822" "ENSG00000180822"
## [1042] "ENSG00000124212" "ENSG00000206527" "ENSG00000070159"
## [1045] "ENSG00000070159" "ENSG00000070159" "ENSG00000070159"
## [1048] "ENSG00000060656" "ENSG00000060656" "ENSG00000060656"
## [1051] "ENSG00000060656" "ENSG00000060656" "ENSG00000060656"
## [1054] "ENSG00000060656" "ENSG00000060656" "ENSG00000136045"
## [1057] "ENSG00000136045" "ENSG00000165661" "ENSG00000213339"
## [1060] "ENSG00000172007" "ENSG00000172007" "ENSG00000172007"
## [1063] "ENSG00000160271" "ENSG00000108961" "ENSG00000108961"
## [1066] "ENSG00000108961" "ENSG00000173166" "ENSG00000173166"
## [1069] "ENSG00000105122" "ENSG00000108551" "ENSG00000108551"
## [1072] "ENSG00000165105" "ENSG00000100320" "ENSG00000100320"
## [1075] "ENSG00000184863" "ENSG00000138207" "ENSG00000139547"
## [1078] "ENSG00000139547" "ENSG00000174236" "ENSG00000173156"
## [1081] "ENSG00000111785" "ENSG00000167705" "ENSG00000108830"
## [1084] "ENSG00000108830" "ENSG00000178828" "ENSG00000132383"
## [1087] "ENSG00000125844" "ENSG00000125844" "ENSG00000160214"
## [1090] "ENSG00000163825" "ENSG00000163825" "ENSG00000163602"
## [1093] "ENSG00000020577" "ENSG00000020577" "ENSG00000101347"
## [1096] "ENSG00000123453" "ENSG00000168077" "ENSG00000168077"
## [1099] "ENSG00000073060" "ENSG00000073060" "ENSG00000073060"
## [1102] "ENSG00000073060" "ENSG00000073060" "ENSG00000073060"
## [1105] "ENSG00000143653" "ENSG00000100012" "ENSG00000100012"
## [1108] "ENSG00000100012" "ENSG00000100012" "ENSG00000103184"
## [1111] "ENSG00000254154" "ENSG00000254154" "ENSG00000065665"
## [1114] "ENSG00000065665" "ENSG00000065665" "ENSG00000065665"
## [1117] "ENSG00000065665" "ENSG00000025796" "ENSG00000187742"
## [1120] "ENSG00000187742" "ENSG00000179918" "ENSG00000184640"
## [1123] "ENSG00000197249" "ENSG00000196136" "ENSG00000100665"
## [1126] "ENSG00000117601" "ENSG00000117601" "ENSG00000183576"
## [1129] "ENSG00000183576" "ENSG00000144040" "ENSG00000144040"
## [1132] "ENSG00000144040" "ENSG00000104205" "ENSG00000104205"
## [1135] "ENSG00000130147" "ENSG00000107957" "ENSG00000105251"
## [1138] "ENSG00000105251" "ENSG00000138606" "ENSG00000164690"
## [1141] "ENSG00000164690" "ENSG00000138771" "ENSG00000112246"
## [1144] "ENSG00000169375" "ENSG00000096717" "ENSG00000124523"
## [1147] "ENSG00000141526" "ENSG00000170190" "ENSG00000112337"
## [1150] "ENSG00000112337" "ENSG00000112337" "ENSG00000146039"
## [1153] "ENSG00000110436" "ENSG00000168575" "ENSG00000175003"
## [1156] "ENSG00000175003" "ENSG00000175003" "ENSG00000175003"
## [1159] "ENSG00000149742" "ENSG00000148339" "ENSG00000171612"
## [1162] "ENSG00000171612" "ENSG00000171612" "ENSG00000181035"
## [1165] "ENSG00000140107" "ENSG00000140284" "ENSG00000140284"
## [1168] "ENSG00000083807" "ENSG00000197496" "ENSG00000173262"
## [1171] "ENSG00000196660" "ENSG00000196660" "ENSG00000196660"
## [1174] "ENSG00000196660" "ENSG00000130958" "ENSG00000127526"
## [1177] "ENSG00000138449" "ENSG00000162426" "ENSG00000022567"
## [1180] "ENSG00000022567" "ENSG00000076351" "ENSG00000076351"
## [1183] "ENSG00000139508" "ENSG00000139508" "ENSG00000111181"
## [1186] "ENSG00000111181" "ENSG00000111181" "ENSG00000130876"
## [1189] "ENSG00000130876" "ENSG00000103257" "ENSG00000137834"
## [1192] "ENSG00000137834" "ENSG00000137834" "ENSG00000137834"
## [1195] "ENSG00000103056" "ENSG00000103056" "ENSG00000198742"
## [1198] "ENSG00000198742" "ENSG00000198742" "ENSG00000165684"
## [1201] "ENSG00000201902" "ENSG00000104852" "ENSG00000147481"
## [1204] "ENSG00000142168" "ENSG00000142168" "ENSG00000184985"
## [1207] "ENSG00000184985" "ENSG00000137642" "ENSG00000100146"
## [1210] "ENSG00000153498" "ENSG00000227213" "ENSG00000006282"
## [1213] "ENSG00000006282" "ENSG00000006282" "ENSG00000186583"
## [1216] "ENSG00000186583" "ENSG00000204710" "ENSG00000100014"
## [1219] "ENSG00000072080" "ENSG00000136158" "ENSG00000162032"
## [1222] "ENSG00000173898" "ENSG00000137877" "ENSG00000049319"
## [1225] "ENSG00000182934" "ENSG00000182934" "ENSG00000147488"
## [1228] "ENSG00000166444" "ENSG00000073849" "ENSG00000070731"
## [1231] "ENSG00000133121" "ENSG00000166888" "ENSG00000115107"
## [1234] "ENSG00000115107" "ENSG00000115107" "ENSG00000115107"
## [1237] "ENSG00000115107" "ENSG00000115107" "ENSG00000118046"
## [1240] "ENSG00000204344" "ENSG00000116030" "ENSG00000116030"
## [1243] "ENSG00000116030" "ENSG00000116030" "ENSG00000116030"
## [1246] "ENSG00000116030" "ENSG00000100242" "ENSG00000196235"
## [1249] "ENSG00000173227" "ENSG00000177156" "ENSG00000143374"
## [1252] "ENSG00000135111" "ENSG00000135111" "ENSG00000135111"
## [1255] "ENSG00000135111" "ENSG00000135111" "ENSG00000135111"
## [1258] "ENSG00000011007" "ENSG00000163792" "ENSG00000163792"
## [1261] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1264] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1267] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1270] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1273] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1276] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1279] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1282] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1285] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1288] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1291] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1294] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1297] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1300] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1303] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1306] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1309] "ENSG00000148737" "ENSG00000148737" "ENSG00000148737"
## [1312] "ENSG00000148737" "ENSG00000101190" "ENSG00000100109"
## [1315] "ENSG00000100109" "ENSG00000106327" "ENSG00000072274"
## [1318] "ENSG00000125780" "ENSG00000125780" "ENSG00000101158"
## [1321] "ENSG00000079134" "ENSG00000172009" "ENSG00000090534"
## [1324] "ENSG00000090534" "ENSG00000090534" "ENSG00000187720"
## [1327] "ENSG00000221995" "ENSG00000104980" "ENSG00000100234"
## [1330] "ENSG00000150455" "ENSG00000150455" "ENSG00000196781"
## [1333] "ENSG00000140332" "ENSG00000140332" "ENSG00000140332"
## [1336] "ENSG00000135926" "ENSG00000134291" "ENSG00000134291"
## [1339] "ENSG00000134291" "ENSG00000134291" "ENSG00000168936"
## [1342] "ENSG00000168936" "ENSG00000064545" "ENSG00000064545"
## [1345] "ENSG00000106565" "ENSG00000106565" "ENSG00000106565"
## [1348] "ENSG00000187824" "ENSG00000204278" "ENSG00000204278"
## [1351] "ENSG00000204278" "ENSG00000149582" "ENSG00000149582"
## [1354] "ENSG00000149582" "ENSG00000149582" "ENSG00000149582"
## [1357] "ENSG00000149582" "ENSG00000109133" "ENSG00000109133"
## [1360] "ENSG00000177042" "ENSG00000177042" "ENSG00000109084"
## [1363] "ENSG00000187045" "ENSG00000109079" "ENSG00000185361"
## [1366] "ENSG00000028137" "ENSG00000141232" "ENSG00000141232"
## [1369] "ENSG00000136816" "ENSG00000188001" "ENSG00000056558"
## [1372] "ENSG00000131653" "ENSG00000131653" "ENSG00000173334"
## [1375] "ENSG00000173334" "ENSG00000173334" "ENSG00000101255"
## [1378] "ENSG00000234127" "ENSG00000234127" "ENSG00000169871"
## [1381] "ENSG00000144481" "ENSG00000144481" "ENSG00000011105"
## [1384] "ENSG00000011105" "ENSG00000235217" "ENSG00000172425"
## [1387] "ENSG00000100304" "ENSG00000120440" "ENSG00000124120"
## [1390] "ENSG00000124120" "ENSG00000124120" "ENSG00000118271"
## [1393] "ENSG00000136295" "ENSG00000198431" "ENSG00000143569"
## [1396] "ENSG00000078140" "ENSG00000078140" "ENSG00000078140"
## [1399] "ENSG00000169139" "ENSG00000168246" "ENSG00000162543"
## [1402] "ENSG00000162543" "ENSG00000241635" "ENSG00000241635"
## [1405] "ENSG00000105668" "ENSG00000105698" "ENSG00000105698"
## [1408] "ENSG00000140455" "ENSG00000140455" "ENSG00000140455"
## [1411] "ENSG00000140455" "ENSG00000103404" "ENSG00000148429"
## [1414] "ENSG00000132952" "ENSG00000152818" "ENSG00000132612"
## [1417] "ENSG00000141252" "ENSG00000141252" "ENSG00000109072"
## [1420] "ENSG00000109072" "ENSG00000110799" "ENSG00000186153"
## [1423] "ENSG00000186153" "ENSG00000186153" "ENSG00000114127"
## [1426] "ENSG00000114127" "ENSG00000175155" "ENSG00000205189"
## [1429] "ENSG00000205189" "ENSG00000169155" "ENSG00000169155"
## [1432] "ENSG00000184828" "ENSG00000141664" "ENSG00000153786"
## [1435] "ENSG00000153786" "ENSG00000099904" "ENSG00000099904"
## [1438] "ENSG00000140836" "ENSG00000185650" "ENSG00000185650"
## [1441] "ENSG00000100711" "ENSG00000100711" "ENSG00000108175"
## [1444] "ENSG00000122515" "ENSG00000085644" "ENSG00000171940"
## [1447] "ENSG00000168813" "ENSG00000168813" "ENSG00000185252"
## [1450] "ENSG00000185252" "ENSG00000185252" "ENSG00000185252"
## [1453] "ENSG00000185252" "ENSG00000188372" "ENSG00000070476"
## [1456] "ENSG00000070476"
# Convert the TSS to ensg as well 

ref_gene_hg19_ensg <- merge(refGene_hg19_TSS, gene_id, by.x = c("V5"), by.y = c("Gene"))

all_ref_gene_hg19_ensg <- unique(ref_gene_hg19_ensg$ensg)

liver_ref_gene <- all_ref_gene_hg19_ensg %in% comb_kidney$ensg

# Revisions- run GO
# Merge ENSG with true/false

test_gene <- as.numeric(as.vector(liver_ref_gene))
names(test_gene) <-  all_ref_gene_hg19_ensg

# Run topGO
go_data <- new("topGOdata",
                   ontology = "BP",
                   allGenes = test_gene, 
                    geneSel = function(allScore){
    return(allScore > 0)
},
                   nodeSize = 5,
                   annotationFun = annFUN.org,
                   mapping = "org.Hs.eg.db",
                   ID = "ensembl")
## 
## Building most specific GOs .....
##  ( 11450 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15456 GO terms and 36153 relations. )
## 
## Annotating nodes ...............
##  ( 14505 genes annotated to the GO terms. )
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 6020 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  4 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  20 nodes to be scored   (8 eliminated genes)
## 
##   Level 15:  57 nodes to be scored   (71 eliminated genes)
## 
##   Level 14:  116 nodes to be scored  (316 eliminated genes)
## 
##   Level 13:  208 nodes to be scored  (911 eliminated genes)
## 
##   Level 12:  308 nodes to be scored  (1979 eliminated genes)
## 
##   Level 11:  502 nodes to be scored  (4148 eliminated genes)
## 
##   Level 10:  677 nodes to be scored  (5660 eliminated genes)
## 
##   Level 9:   819 nodes to be scored  (7855 eliminated genes)
## 
##   Level 8:   848 nodes to be scored  (9971 eliminated genes)
## 
##   Level 7:   873 nodes to be scored  (11436 eliminated genes)
## 
##   Level 6:   739 nodes to be scored  (12657 eliminated genes)
## 
##   Level 5:   470 nodes to be scored  (13331 eliminated genes)
## 
##   Level 4:   250 nodes to be scored  (13846 eliminated genes)
## 
##   Level 3:   105 nodes to be scored  (14079 eliminated genes)
## 
##   Level 2:   22 nodes to be scored   (14207 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (14324 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
                         orderBy = "weightFisher", ranksOf = "weightFisher",
                         topNodes = sum(score(go_test) < .05))

go_table
##          GO.ID                                        Term Annotated
## 1   GO:0030449         regulation of complement activation        41
## 2   GO:0042632                     cholesterol homeostasis        68
## 3   GO:0006953                        acute-phase response        42
## 4   GO:0006958    complement activation, classical pathway        28
## 5   GO:0070328                    triglyceride homeostasis        29
## 6   GO:0009820                  alkaloid metabolic process         5
## 7   GO:0002576                      platelet degranulation       120
## 8   GO:0043691               reverse cholesterol transport        16
## 9   GO:0034384 high-density lipoprotein particle cleara...        11
## 10  GO:0006641              triglyceride metabolic process        90
## 11  GO:0055089                      fatty acid homeostasis        12
## 12  GO:0046628 positive regulation of insulin receptor ...        13
## 13  GO:0006879               cellular iron ion homeostasis        57
## 14  GO:0006957 complement activation, alternative pathw...        14
## 15  GO:0034375 high-density lipoprotein particle remode...        15
## 16  GO:0008203               cholesterol metabolic process       121
## 17  GO:0090277 positive regulation of peptide hormone s...        79
## 18  GO:0006898               receptor-mediated endocytosis       264
## 19  GO:0008202                   steroid metabolic process       265
## 20  GO:0006768                    biotin metabolic process        10
## 21  GO:0034372 very-low-density lipoprotein particle re...        10
## 22  GO:0019835                                   cytolysis        35
## 23  GO:0007597        blood coagulation, intrinsic pathway        17
## 24  GO:1904179 positive regulation of adipose tissue de...         5
## 25  GO:0001907           killing by symbiont of host cells         5
## 26  GO:0006524                   alanine catabolic process         5
## 27  GO:0042738            exogenous drug catabolic process         5
## 28  GO:0001889                           liver development       124
## 29  GO:0007584                        response to nutrient       168
## 30  GO:0010951 negative regulation of endopeptidase act...       213
## 31  GO:0017187        peptidyl-glutamic acid carboxylation        11
## 32  GO:0050892                       intestinal absorption        34
## 33  GO:1904683 regulation of metalloendopeptidase activ...         6
## 34  GO:0015942                   formate metabolic process         6
## 35  GO:0070141                            response to UV-A         6
## 36  GO:0097267              omega-hydroxylase P450 pathway         6
## 37  GO:0006536                 glutamate metabolic process        30
## 38  GO:0061138     morphogenesis of a branching epithelium       168
## 39  GO:0045944 positive regulation of transcription fro...      1010
## 40  GO:0034374 low-density lipoprotein particle remodel...        13
## 41  GO:0043433 negative regulation of DNA binding trans...       130
## 42  GO:0070301      cellular response to hydrogen peroxide        74
## 43  GO:0048538                          thymus development        41
## 44  GO:0060059 embryonic retina morphogenesis in camera...         7
## 45  GO:0045620 negative regulation of lymphocyte differ...        40
## 46  GO:0045717 negative regulation of fatty acid biosyn...        14
## 47  GO:0002244 hematopoietic progenitor cell differenti...       155
## 48  GO:0045540 regulation of cholesterol biosynthetic p...        38
## 49  GO:0034380 high-density lipoprotein particle assemb...        15
## 50  GO:0045725 positive regulation of glycogen biosynth...        15
## 51  GO:0071276            cellular response to cadmium ion        24
## 52  GO:0046620                  regulation of organ growth        89
## 53  GO:1903427 negative regulation of reactive oxygen s...        23
## 54  GO:0010918 positive regulation of mitochondrial mem...         8
## 55  GO:0015886                              heme transport         8
## 56  GO:0051715                 cytolysis in other organism         8
## 57  GO:0006548                 histidine catabolic process         8
## 58  GO:0019216       regulation of lipid metabolic process       332
## 59  GO:0071559 response to transforming growth factor b...       203
## 60  GO:0019695                   choline metabolic process         9
## 61  GO:0010984 regulation of lipoprotein particle clear...        17
## 62  GO:0090218 positive regulation of lipid kinase acti...        31
## 63  GO:0010894 negative regulation of steroid biosynthe...        22
## 64  GO:0046329          negative regulation of JNK cascade        30
## 65  GO:0006739                      NADP metabolic process        33
## 66  GO:0016310                             phosphorylation      2076
## 67  GO:0000098         sulfur amino acid catabolic process         9
## 68  GO:0042908                        xenobiotic transport         9
## 69  GO:0070989                     oxidative demethylation         9
## 70  GO:0030917     midbrain-hindbrain boundary development         9
## 71  GO:0046185                  aldehyde catabolic process         9
## 72  GO:0070857 regulation of bile acid biosynthetic pro...         9
## 73  GO:0055091                    phospholipid homeostasis         9
## 74  GO:0033344                          cholesterol efflux        42
## 75  GO:0071157    negative regulation of cell cycle arrest        18
## 76  GO:0042593                         glucose homeostasis       200
## 77  GO:0006869                             lipid transport       311
## 78  GO:0051004 regulation of lipoprotein lipase activit...        18
## 79  GO:0050665      hydrogen peroxide biosynthetic process        10
## 80  GO:0051712 positive regulation of killing of cells ...        10
## 81  GO:0006853                           carnitine shuttle        10
## 82  GO:1902237 positive regulation of endoplasmic retic...        10
## 83  GO:0001558                   regulation of cell growth       356
## 84  GO:0050730 regulation of peptidyl-tyrosine phosphor...       210
## 85  GO:0042157               lipoprotein metabolic process       164
## 86  GO:0051081                nuclear envelope disassembly        43
## 87  GO:0019359 nicotinamide nucleotide biosynthetic pro...        27
## 88  GO:0034340               response to type I interferon        68
## 89  GO:0051647                        nucleus localization        22
## 90  GO:0001867       complement activation, lectin pathway        11
## 91  GO:0019373                    epoxygenase P450 pathway        11
## 92  GO:0071372 cellular response to follicle-stimulatin...        11
## 93  GO:0006559           L-phenylalanine catabolic process        11
## 94  GO:0045739           positive regulation of DNA repair        44
## 95  GO:0071560 cellular response to transforming growth...       200
## 96  GO:0006855                drug transmembrane transport        21
## 97  GO:0006368 transcription elongation from RNA polyme...        94
## 98  GO:0097421                          liver regeneration        32
## 99  GO:0030514 negative regulation of BMP signaling pat...        44
## 100 GO:0006629                     lipid metabolic process      1267
## 101 GO:0006520       cellular amino acid metabolic process       337
## 102 GO:0043903 regulation of symbiosis, encompassing mu...       197
## 103 GO:0042730                                fibrinolysis        25
## 104 GO:0045830    positive regulation of isotype switching        22
## 105 GO:0099133 ATP hydrolysis coupled anion transmembra...        12
## 106 GO:1900119 positive regulation of execution phase o...        12
## 107 GO:2001241 positive regulation of extrinsic apoptot...        12
## 108 GO:0045721      negative regulation of gluconeogenesis        12
## 109 GO:0006695            cholesterol biosynthetic process        60
## 110 GO:0021915                     neural tube development       153
## 111 GO:0010646            regulation of cell communication      3042
## 112 GO:0042177 negative regulation of protein catabolic...       102
## 113 GO:0033572                       transferrin transport        34
## 114 GO:0022900                    electron transport chain       143
## 115 GO:0042158            lipoprotein biosynthetic process       135
## 116 GO:2000646 positive regulation of receptor cataboli...         5
## 117 GO:0001887         selenium compound metabolic process         5
## 118 GO:0021894 cerebral cortex GABAergic interneuron de...         5
## 119 GO:1901523                 icosanoid catabolic process         5
## 120 GO:0021814 cell motility involved in cerebral corte...         5
## 121 GO:0032430 positive regulation of phospholipase A2 ...         5
## 122 GO:0010991 negative regulation of SMAD protein comp...         5
## 123 GO:0019740                        nitrogen utilization         5
## 124 GO:0060437                                 lung growth         5
## 125 GO:0048625                    myoblast fate commitment         5
## 126 GO:0019626    short-chain fatty acid catabolic process         5
## 127 GO:1903753      negative regulation of p38MAPK cascade         5
## 128 GO:0010040                    response to iron(II) ion         5
## 129 GO:0015911 plasma membrane long-chain fatty acid tr...         5
## 130 GO:0006572                  tyrosine catabolic process         5
## 131 GO:0090324 negative regulation of oxidative phospho...         5
## 132 GO:0031999 negative regulation of fatty acid beta-o...         5
## 133 GO:0090118 receptor-mediated endocytosis involved i...         5
## 134 GO:0072049             comma-shaped body morphogenesis         5
## 135 GO:0010269                    response to selenium ion         5
## 136 GO:0051642                     centrosome localization        23
## 137 GO:0051147 regulation of muscle cell differentiatio...       179
## 138 GO:0034142      toll-like receptor 4 signaling pathway        28
## 139 GO:1900016 negative regulation of cytokine producti...        13
## 140 GO:0071397            cellular response to cholesterol        13
## 141 GO:0016540                      protein autoprocessing        13
## 142 GO:0060712        spongiotrophoblast layer development        13
## 143 GO:0032436 positive regulation of proteasomal ubiqu...        75
## 144 GO:0055114                 oxidation-reduction process       852
## 145 GO:0034504             protein localization to nucleus       341
## 146 GO:0006699              bile acid biosynthetic process        33
## 147 GO:0046718                  viral entry into host cell       102
## 148 GO:2000463 positive regulation of excitatory postsy...        24
## 149 GO:0007411                               axon guidance       219
## 150 GO:0042127            regulation of cell proliferation      1447
## 151 GO:0031325 positive regulation of cellular metaboli...      2748
## 152 GO:0002922 positive regulation of humoral immune re...        14
## 153 GO:0034356 NAD biosynthesis via nicotinamide ribosi...        14
## 154 GO:0042104 positive regulation of activated T cell ...        25
## 155 GO:0032270 positive regulation of cellular protein ...      1352
## 156 GO:0090335 regulation of brown fat cell differentia...        13
## 157 GO:0034763 negative regulation of transmembrane tra...        85
## 158 GO:0071156             regulation of cell cycle arrest        98
## 159 GO:0032375 negative regulation of cholesterol trans...        14
## 160 GO:2000659 regulation of interleukin-1-mediated sig...         6
## 161 GO:1990535               neuron projection maintenance         6
## 162 GO:1902946      protein localization to early endosome         6
## 163 GO:0042940                      D-amino acid transport         6
## 164 GO:0033860      regulation of NAD(P)H oxidase activity         6
## 165 GO:1900107       regulation of nodal signaling pathway         6
## 166 GO:0046498    S-adenosylhomocysteine metabolic process         6
## 167 GO:1905049 negative regulation of metallopeptidase ...         6
## 168 GO:1903896 positive regulation of IRE1-mediated unf...         6
## 169 GO:1902459 positive regulation of stem cell populat...         6
## 170 GO:0042426                   choline catabolic process         6
## 171 GO:0031659 positive regulation of cyclin-dependent ...         6
## 172 GO:0034382               chylomicron remnant clearance         6
## 173 GO:2000343 positive regulation of chemokine (C-X-C ...         6
## 174 GO:0006545                glycine biosynthetic process         6
## 175 GO:0010793      regulation of mRNA export from nucleus         6
## 176 GO:1905668 positive regulation of protein localizat...         6
## 177 GO:1903364 positive regulation of cellular protein ...       186
## 178 GO:0021879            forebrain neuron differentiation        49
## 179 GO:0019432           triglyceride biosynthetic process        31
## 180 GO:0006461                    protein complex assembly      1257
## 181 GO:0021542                   dentate gyrus development        15
## 182 GO:0090201 negative regulation of release of cytoch...        15
## 183 GO:0000060 protein import into nucleus, translocati...        45
## 184 GO:0046326       positive regulation of glucose import        39
## 185 GO:0035774 positive regulation of insulin secretion...        26
## 186 GO:0007628                      adult walking behavior        26
## 187 GO:0032088 negative regulation of NF-kappaB transcr...        66
## 188 GO:0008152                           metabolic process      9852
## 189 GO:0048013           ephrin receptor signaling pathway        81
## 190 GO:0043392          negative regulation of DNA binding        47
## 191 GO:0009887                  animal organ morphogenesis       919
## 192 GO:0032094                            response to food        34
## 193 GO:0021756                        striatum development        16
## 194 GO:0060644 mammary gland epithelial cell differenti...        16
## 195 GO:1903830       magnesium ion transmembrane transport        16
## 196 GO:0007595                                   lactation        40
## 197 GO:0000122 negative regulation of transcription fro...       727
## 198 GO:0097190                 apoptotic signaling pathway       543
## 199 GO:0031663 lipopolysaccharide-mediated signaling pa...        48
## 200 GO:0030178 negative regulation of Wnt signaling pat...       186
## 201 GO:0030195    negative regulation of blood coagulation        46
## 202 GO:0019674                       NAD metabolic process        64
## 203 GO:1902373 negative regulation of mRNA catabolic pr...        40
## 204 GO:0032786 positive regulation of DNA-templated tra...        20
## 205 GO:0033598 mammary gland epithelial cell proliferat...        22
## 206 GO:0034638       phosphatidylcholine catabolic process         7
## 207 GO:0010898 positive regulation of triglyceride cata...         7
## 208 GO:0070544                histone H3-K36 demethylation         7
## 209 GO:0010886 positive regulation of cholesterol stora...         7
## 210 GO:0060052     neurofilament cytoskeleton organization         7
## 211 GO:0021830 interneuron migration from the subpalliu...         7
## 212 GO:1901679          nucleotide transmembrane transport         7
## 213 GO:0000042                  protein targeting to Golgi         7
## 214 GO:0051045 negative regulation of membrane protein ...         7
## 215 GO:1904468 negative regulation of tumor necrosis fa...         7
## 216 GO:0044332 Wnt signaling pathway involved in dorsal...         7
## 217 GO:0035356           cellular triglyceride homeostasis         7
## 218 GO:0032287 peripheral nervous system myelin mainten...         7
## 219 GO:0060463                     lung lobe morphogenesis         7
## 220 GO:0046439                L-cysteine metabolic process         7
## 221 GO:1903874        ferrous iron transmembrane transport         7
## 222 GO:0031666 positive regulation of lipopolysaccharid...         7
## 223 GO:0042167                      heme catabolic process         7
## 224 GO:2000192 negative regulation of fatty acid transp...         7
## 225 GO:0090336 positive regulation of brown fat cell di...         7
## 226 GO:0042758     long-chain fatty acid catabolic process         7
## 227 GO:0030300 regulation of intestinal cholesterol abs...         7
## 228 GO:0006600                  creatine metabolic process         7
## 229 GO:2000271 positive regulation of fibroblast apopto...         7
## 230 GO:0008090                 retrograde axonal transport         7
## 231 GO:0002679 respiratory burst involved in defense re...         7
## 232 GO:0032071 regulation of endodeoxyribonuclease acti...         7
## 233 GO:0043627                        response to estrogen        66
## 234 GO:0051346 negative regulation of hydrolase activit...       362
## 235 GO:0061045        negative regulation of wound healing        63
## 236 GO:0017144                      drug metabolic process        24
## 237 GO:0007041                         lysosomal transport        86
## 238 GO:0001938 positive regulation of endothelial cell ...        70
## 239 GO:0051000 positive regulation of nitric-oxide synt...        17
## 240 GO:1902176 negative regulation of oxidative stress-...        17
## 241 GO:0031100                   animal organ regeneration        74
## 242 GO:0032869       cellular response to insulin stimulus       176
## 243 GO:0032007        negative regulation of TOR signaling        40
## 244 GO:0018279 protein N-linked glycosylation via aspar...        42
## 245 GO:0045599 negative regulation of fat cell differen...        42
## 246 GO:0032148     activation of protein kinase B activity        29
## 247 GO:0001503                                ossification       343
## 248 GO:1902680 positive regulation of RNA biosynthetic ...      1299
## 249 GO:0030324                            lung development       164
## 250 GO:0060021                          palate development        87
## 251 GO:2000257    regulation of protein activation cascade        42
## 252 GO:0042493                            response to drug       373
## 253 GO:0001568                    blood vessel development       570
## 254 GO:0031016                        pancreas development        67
## 255 GO:0006778 porphyrin-containing compound metabolic ...        34
## 256 GO:1900076 regulation of cellular response to insul...        60
## 257 GO:0090239        regulation of histone H4 acetylation        10
## 258 GO:0019627                      urea metabolic process        11
## 259 GO:0015865                 purine nucleotide transport        12
## 260 GO:0061620 glycolytic process through glucose-6-pho...        24
## 261 GO:0050820          positive regulation of coagulation        24
## 262 GO:0006476                       protein deacetylation        92
## 263 GO:0046631                alpha-beta T cell activation       114
## 264 GO:0098902 regulation of membrane depolarization du...         6
## 265 GO:1902959 regulation of aspartic-type endopeptidas...         6
## 266 GO:0052433 modulation by organism of apoptotic proc...         6
## 267 GO:0070943 neutrophil mediated killing of symbiont ...         6
## 268 GO:0046884      follicle-stimulating hormone secretion         6
## 269 GO:0052040 modulation by symbiont of host programme...         6
## 270 GO:0051340               regulation of ligase activity         7
## 271 GO:0035733            hepatic stellate cell activation         7
## 272 GO:0046838 phosphorylated carbohydrate dephosphoryl...        10
## 273 GO:1901160    primary amino compound metabolic process        13
## 274 GO:0036475 neuron death in response to oxidative st...        21
## 275 GO:0042773    ATP synthesis coupled electron transport        66
## 276 GO:0010039                        response to iron ion        33
## 277 GO:0006505                GPI anchor metabolic process        28
## 278 GO:1900182 positive regulation of protein localizat...       124
## 279 GO:0052652 cyclic purine nucleotide metabolic proce...       145
## 280 GO:0035909                         aorta morphogenesis        30
## 281 GO:0014911 positive regulation of smooth muscle cel...        30
## 282 GO:0035435       phosphate ion transmembrane transport        18
## 283 GO:0006465                   signal peptide processing        18
## 284 GO:0061003 positive regulation of dendritic spine m...        18
## 285 GO:0035264               multicellular organism growth       137
## 286 GO:0014068 positive regulation of phosphatidylinosi...        58
## 287 GO:0043405           regulation of MAP kinase activity       303
## 288 GO:0008595 anterior/posterior axis specification, e...        13
## 289 GO:0050746 regulation of lipoprotein metabolic proc...        13
## 290 GO:0045657 positive regulation of monocyte differen...         8
## 291 GO:0010873 positive regulation of cholesterol ester...         8
## 292 GO:0001561                  fatty acid alpha-oxidation         8
## 293 GO:0071073 positive regulation of phospholipid bios...         8
## 294 GO:1900364 negative regulation of mRNA polyadenylat...         8
## 295 GO:1900112 regulation of histone H3-K9 trimethylati...         8
## 296 GO:0008635 activation of cysteine-type endopeptidas...         8
## 297 GO:0007262            STAT protein import into nucleus         8
## 298 GO:0006390 transcription from mitochondrial promote...         8
## 299 GO:0015866                               ADP transport         8
## 300 GO:0046487                glyoxylate metabolic process         8
## 301 GO:0051573 negative regulation of histone H3-K9 met...         8
## 302 GO:0032926 negative regulation of activin receptor ...         8
## 303 GO:0032933                     SREBP signaling pathway         8
## 304 GO:0006591                 ornithine metabolic process         8
## 305 GO:1990314 cellular response to insulin-like growth...         8
## 306 GO:0031937 positive regulation of chromatin silenci...         8
## 307 GO:0090400         stress-induced premature senescence         8
## 308 GO:0051897 positive regulation of protein kinase B ...       137
##     Significant Expected weightFisher
## 1            13     1.87      1.8e-08
## 2            15     3.10      1.4e-07
## 3            11     1.91      6.7e-07
## 4             9     1.28      2.5e-06
## 5            10     1.32      3.2e-06
## 6             4     0.23      2.1e-05
## 7            17     5.47      3.0e-05
## 8             6     0.73      4.7e-05
## 9             5     0.50      7.1e-05
## 10           16     4.10      8.7e-05
## 11            5     0.55      0.00012
## 12            5     0.59      0.00018
## 13           10     2.60      0.00022
## 14            5     0.64      0.00027
## 15            5     0.68      0.00040
## 16           21     5.51      0.00041
## 17           12     3.60      0.00044
## 18           25    12.03      0.00044
## 19           41    12.08      0.00047
## 20            4     0.46      0.00072
## 21            4     0.46      0.00072
## 22            9     1.59      0.00074
## 23            5     0.77      0.00076
## 24            3     0.23      0.00088
## 25            3     0.23      0.00088
## 26            3     0.23      0.00088
## 27            3     0.23      0.00088
## 28           17     5.65      0.00091
## 29           21     7.66      0.00100
## 30           18     9.71      0.00103
## 31            4     0.50      0.00109
## 32            8     1.55      0.00155
## 33            3     0.27      0.00170
## 34            3     0.27      0.00170
## 35            3     0.27      0.00170
## 36            3     0.27      0.00170
## 37            6     1.37      0.00204
## 38            9     7.66      0.00208
## 39           67    46.03      0.00213
## 40            4     0.59      0.00220
## 41           16     5.92      0.00225
## 42            9     3.37      0.00227
## 43            7     1.87      0.00230
## 44            3     0.32      0.00287
## 45            4     1.82      0.00288
## 46            4     0.64      0.00297
## 47           12     7.06      0.00328
## 48            7     1.73      0.00336
## 49            4     0.68      0.00390
## 50            4     0.68      0.00390
## 51            5     1.09      0.00400
## 52            8     4.06      0.00442
## 53            4     1.05      0.00444
## 54            3     0.36      0.00444
## 55            3     0.36      0.00444
## 56            3     0.36      0.00444
## 57            3     0.36      0.00444
## 58           39    15.13      0.00523
## 59           15     9.25      0.00596
## 60            4     0.41      0.00600
## 61            4     0.77      0.00601
## 62            3     1.41      0.00604
## 63            5     1.00      0.00630
## 64            5     1.37      0.00631
## 65            5     1.50      0.00632
## 66          116    94.60      0.00636
## 67            3     0.41      0.00644
## 68            3     0.41      0.00644
## 69            3     0.41      0.00644
## 70            3     0.41      0.00644
## 71            3     0.41      0.00644
## 72            3     0.41      0.00644
## 73            3     0.41      0.00644
## 74            8     1.91      0.00772
## 75            4     0.82      0.00784
## 76           20     9.11      0.00840
## 77           38    14.17      0.00841
## 78            4     0.82      0.00887
## 79            3     0.46      0.00889
## 80            3     0.46      0.00889
## 81            3     0.46      0.00889
## 82            3     0.46      0.00889
## 83           28    16.22      0.00937
## 84           18     9.57      0.01152
## 85           14     7.47      0.01162
## 86            6     1.96      0.01162
## 87            5     1.23      0.01163
## 88            6     3.10      0.01166
## 89            3     1.00      0.01169
## 90            3     0.50      0.01181
## 91            3     0.50      0.01181
## 92            3     0.50      0.01181
## 93            3     0.50      0.01181
## 94            6     2.01      0.01226
## 95           13     9.11      0.01269
## 96            4     0.96      0.01377
## 97           10     4.28      0.01386
## 98            5     1.46      0.01404
## 99            6     2.01      0.01413
## 100          99    57.74      0.01441
## 101          36    15.36      0.01480
## 102          17     8.98      0.01488
## 103           5     1.14      0.01514
## 104           4     1.00      0.01519
## 105           3     0.55      0.01522
## 106           3     0.55      0.01522
## 107           3     0.55      0.01522
## 108           3     0.55      0.01522
## 109          11     2.73      0.01581
## 110          13     6.97      0.01599
## 111         165   138.63      0.01725
## 112          11     4.65      0.01770
## 113           5     1.55      0.01801
## 114          12     6.52      0.01863
## 115           8     6.15      0.01891
## 116           2     0.23      0.01892
## 117           2     0.23      0.01892
## 118           2     0.23      0.01892
## 119           2     0.23      0.01892
## 120           2     0.23      0.01892
## 121           2     0.23      0.01892
## 122           2     0.23      0.01892
## 123           2     0.23      0.01892
## 124           2     0.23      0.01892
## 125           2     0.23      0.01892
## 126           2     0.23      0.01892
## 127           2     0.23      0.01892
## 128           2     0.23      0.01892
## 129           2     0.23      0.01892
## 130           2     0.23      0.01892
## 131           2     0.23      0.01892
## 132           2     0.23      0.01892
## 133           2     0.23      0.01892
## 134           2     0.23      0.01892
## 135           2     0.23      0.01892
## 136           4     1.05      0.01896
## 137           9     8.16      0.01897
## 138           4     1.28      0.01911
## 139           3     0.59      0.01913
## 140           3     0.59      0.01913
## 141           3     0.59      0.01913
## 142           3     0.59      0.01913
## 143           8     3.42      0.02051
## 144          59    38.83      0.02063
## 145          27    15.54      0.02102
## 146           7     1.50      0.02168
## 147          11     4.65      0.02172
## 148           4     1.09      0.02196
## 149          17     9.98      0.02232
## 150          79    65.94      0.02293
## 151         172   125.23      0.02332
## 152           3     0.64      0.02354
## 153           3     0.64      0.02354
## 154           4     1.14      0.02522
## 155          81    61.61      0.02602
## 156           4     0.59      0.02739
## 157           7     3.87      0.02741
## 158           7     4.47      0.02746
## 159           3     0.64      0.02747
## 160           2     0.27      0.02752
## 161           2     0.27      0.02752
## 162           2     0.27      0.02752
## 163           2     0.27      0.02752
## 164           2     0.27      0.02752
## 165           2     0.27      0.02752
## 166           2     0.27      0.02752
## 167           2     0.27      0.02752
## 168           2     0.27      0.02752
## 169           2     0.27      0.02752
## 170           2     0.27      0.02752
## 171           2     0.27      0.02752
## 172           2     0.27      0.02752
## 173           2     0.27      0.02752
## 174           2     0.27      0.02752
## 175           2     0.27      0.02752
## 176           2     0.27      0.02752
## 177          13     8.48      0.02820
## 178           6     2.23      0.02829
## 179           5     1.41      0.02831
## 180          73    57.28      0.02845
## 181           3     0.68      0.02845
## 182           3     0.68      0.02845
## 183           6     2.05      0.02861
## 184           5     1.78      0.02871
## 185           4     1.18      0.02877
## 186           4     1.18      0.02877
## 187           7     3.01      0.03008
## 188         492   448.96      0.03079
## 189           8     3.69      0.03092
## 190           6     2.14      0.03242
## 191          51    41.88      0.03332
## 192           4     1.55      0.03384
## 193           3     0.73      0.03386
## 194           3     0.73      0.03386
## 195           3     0.73      0.03386
## 196           5     1.82      0.03412
## 197          45    33.13      0.03431
## 198          39    24.74      0.03565
## 199           7     2.19      0.03633
## 200          11     8.48      0.03673
## 201           7     2.10      0.03705
## 202           7     2.92      0.03713
## 203           4     1.82      0.03733
## 204           3     0.91      0.03734
## 205           3     1.00      0.03735
## 206           2     0.32      0.03738
## 207           2     0.32      0.03738
## 208           2     0.32      0.03738
## 209           2     0.32      0.03738
## 210           2     0.32      0.03738
## 211           2     0.32      0.03738
## 212           2     0.32      0.03738
## 213           2     0.32      0.03738
## 214           2     0.32      0.03738
## 215           2     0.32      0.03738
## 216           2     0.32      0.03738
## 217           2     0.32      0.03738
## 218           2     0.32      0.03738
## 219           2     0.32      0.03738
## 220           2     0.32      0.03738
## 221           2     0.32      0.03738
## 222           2     0.32      0.03738
## 223           2     0.32      0.03738
## 224           2     0.32      0.03738
## 225           2     0.32      0.03738
## 226           2     0.32      0.03738
## 227           2     0.32      0.03738
## 228           2     0.32      0.03738
## 229           2     0.32      0.03738
## 230           2     0.32      0.03738
## 231           2     0.32      0.03738
## 232           2     0.32      0.03738
## 233           8     3.01      0.03791
## 234          26    16.50      0.03864
## 235          10     2.87      0.03902
## 236           7     1.09      0.03920
## 237           8     3.92      0.03948
## 238           7     3.19      0.03971
## 239           3     0.77      0.03976
## 240           3     0.77      0.03976
## 241          10     3.37      0.04019
## 242          15     8.02      0.04037
## 243           6     1.82      0.04080
## 244           5     1.91      0.04099
## 245           5     1.91      0.04099
## 246           4     1.32      0.04109
## 247          19    15.63      0.04129
## 248          85    59.20      0.04369
## 249          12     7.47      0.04431
## 250           8     3.96      0.04448
## 251          14     1.91      0.04480
## 252          32    17.00      0.04486
## 253          31    25.98      0.04523
## 254           8     3.05      0.04527
## 255           6     1.55      0.04533
## 256           6     2.73      0.04541
## 257           3     0.46      0.04546
## 258           3     0.50      0.04546
## 259           3     0.55      0.04547
## 260           3     1.09      0.04550
## 261           3     1.09      0.04550
## 262           6     4.19      0.04551
## 263           7     5.20      0.04551
## 264           2     0.27      0.04552
## 265           2     0.27      0.04552
## 266           2     0.27      0.04552
## 267           2     0.27      0.04552
## 268           2     0.27      0.04552
## 269           2     0.27      0.04552
## 270           2     0.32      0.04552
## 271           2     0.32      0.04552
## 272           2     0.46      0.04553
## 273           2     0.59      0.04554
## 274           2     0.96      0.04556
## 275           4     3.01      0.04557
## 276           7     1.50      0.04557
## 277           2     1.28      0.04559
## 278           9     5.65      0.04561
## 279           6     6.61      0.04568
## 280           4     1.37      0.04576
## 281           4     1.37      0.04607
## 282           3     0.82      0.04615
## 283           3     0.82      0.04615
## 284           3     0.82      0.04615
## 285          13     6.24      0.04712
## 286           6     2.64      0.04773
## 287          19    13.81      0.04788
## 288           3     0.59      0.04826
## 289           3     0.59      0.04826
## 290           2     0.36      0.04837
## 291           2     0.36      0.04837
## 292           2     0.36      0.04837
## 293           2     0.36      0.04837
## 294           2     0.36      0.04837
## 295           2     0.36      0.04837
## 296           2     0.36      0.04837
## 297           2     0.36      0.04837
## 298           2     0.36      0.04837
## 299           2     0.36      0.04837
## 300           2     0.36      0.04837
## 301           2     0.36      0.04837
## 302           2     0.36      0.04837
## 303           2     0.36      0.04837
## 304           2     0.36      0.04837
## 305           2     0.36      0.04837
## 306           2     0.36      0.04837
## 307           2     0.36      0.04837
## 308          11     6.24      0.04848
 go_table_liver <- as.data.frame(cbind(go_table$GO.ID, go_table$weightFisher))

write.table(go_table_liver, "../data/go_table_liver.txt", quote = FALSE, row.names = FALSE, col.names = FALSE)

Lung

# Find the heart hypo
heart_hypo <- human_chimp_heart_specific_25_conserved_H3K27_LUNG[which(human_chimp_heart_specific_25_conserved_H3K27_LUNG$V4 == "hyper" & human_chimp_heart_specific_25_conserved_H3K27_LUNG$V6 != "-1"),]

heart_hypo <- heart_hypo[complete.cases(heart_hypo), ]

# Find the closest gene

closest_heart <- bedr(input = list(a = heart_hypo[,1:3], b = refGene_hg19_TSS), method = "closest", check.chr = FALSE)
##  * Processing input (1): a
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... PASS
##  * Checking for overlapping 'contiguous' regions... PASS
##  * Processing input (2): b
## CONVERT TO BED
##  * Checking input type... PASS
##    Input seems to be in bed format but chr/start/end column names are missing
## VALIDATE REGIONS
##  * Check if index is a string... PASS
##  * Check index pattern... PASS
##  * Check for missing values... PASS
##  * Check for larger start position... PASS.
##  * Check if zero based... PASS
##  * Checking sort order... FAIL
##    The input for object is not *lexographically* ordered!
##    This can cause unexpected results for some set operations.
##    try: x <- bedr.sort.region(x)
##  * Checking for overlapping 'contiguous' regions... FAIL
##    The input for object has overlapping features!
##    This can cause unexpected results for some set operations.
##    i.e. x <- bedr.merge.region(x)
##    bedtools closest -a /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/a_bc552850ec24.bed -b /var/folders/rf/qrcw6ncj05z1pc_pq9xzw3540000gn/T//RtmpXK3I56/b_bc557772e5c1.bed
# Convert the gene name to ensg

gene_id <- read.table("../../../Reg_Evo_Primates/data/ENSG_GENE_HG19.csv", stringsAsFactors = FALSE, header=TRUE, sep = ",")

comb_kidney <- merge(closest_heart, gene_id, by.x = c("V8"), by.y = c("Gene"))

comb_kidney$ensg
##  [1] "ENSG00000177674" "ENSG00000177674" "ENSG00000177674"
##  [4] "ENSG00000177674" "ENSG00000177674" "ENSG00000130762"
##  [7] "ENSG00000117411" "ENSG00000026508" "ENSG00000026508"
## [10] "ENSG00000026508" "ENSG00000026508" "ENSG00000026508"
## [13] "ENSG00000026508" "ENSG00000026508" "ENSG00000026508"
## [16] "ENSG00000114861" "ENSG00000114315" "ENSG00000131149"
## [19] "ENSG00000198951" "ENSG00000196498" "ENSG00000196498"
## [22] "ENSG00000196498" "ENSG00000170322" "ENSG00000100100"
## [25] "ENSG00000100100" "ENSG00000185917" "ENSG00000185917"
## [28] "ENSG00000167323" "ENSG00000143643" "ENSG00000143643"
## [31] "ENSG00000153786" "ENSG00000153786" "ENSG00000171940"
## [34] "ENSG00000183621" "ENSG00000183621" "ENSG00000183621"
## [37] "ENSG00000183621" "ENSG00000183621" "ENSG00000183621"
## [40] "ENSG00000183621"
# Convert the TSS to ensg as well 

ref_gene_hg19_ensg <- merge(refGene_hg19_TSS, gene_id, by.x = c("V5"), by.y = c("Gene"))

all_ref_gene_hg19_ensg <- unique(ref_gene_hg19_ensg$ensg)

heart_ref_gene <- all_ref_gene_hg19_ensg %in% comb_kidney$ensg

# Revisions- run GO
# Merge ENSG with true/false

test_gene <- as.numeric(as.vector(heart_ref_gene))
names(test_gene) <-  all_ref_gene_hg19_ensg

# Run topGO
go_data <- new("topGOdata",
                   ontology = "BP",
                   allGenes = test_gene, 
                    geneSel = function(allScore){
    return(allScore > 0)
},
                   nodeSize = 5,
                   annotationFun = annFUN.org,
                   mapping = "org.Hs.eg.db",
                   ID = "ensembl")
## 
## Building most specific GOs .....
##  ( 11450 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15456 GO terms and 36153 relations. )
## 
## Annotating nodes ...............
##  ( 14505 genes annotated to the GO terms. )
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 1110 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 15:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 14:  5 nodes to be scored    (0 eliminated genes)
## 
##   Level 13:  23 nodes to be scored   (86 eliminated genes)
## 
##   Level 12:  44 nodes to be scored   (192 eliminated genes)
## 
##   Level 11:  76 nodes to be scored   (1997 eliminated genes)
## 
##   Level 10:  103 nodes to be scored  (3134 eliminated genes)
## 
##   Level 9:   112 nodes to be scored  (5198 eliminated genes)
## 
##   Level 8:   124 nodes to be scored  (6736 eliminated genes)
## 
##   Level 7:   155 nodes to be scored  (7719 eliminated genes)
## 
##   Level 6:   166 nodes to be scored  (10145 eliminated genes)
## 
##   Level 5:   144 nodes to be scored  (11508 eliminated genes)
## 
##   Level 4:   90 nodes to be scored   (13129 eliminated genes)
## 
##   Level 3:   48 nodes to be scored   (13788 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (14144 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (14277 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
                         orderBy = "weightFisher", ranksOf = "weightFisher",
                         topNodes = sum(score(go_test) < .01))

go_table
##         GO.ID                                        Term Annotated
## 1  GO:0035019    somatic stem cell population maintenance        61
## 2  GO:0061009                common bile duct development         5
## 3  GO:0032237 activation of store-operated calcium cha...         5
## 4  GO:0072049             comma-shaped body morphogenesis         5
## 5  GO:0003310           pancreatic A cell differentiation         5
## 6  GO:0009912      auditory receptor cell fate commitment         6
## 7  GO:1901533 negative regulation of hematopoietic pro...         6
## 8  GO:2001182      regulation of interleukin-12 secretion         6
## 9  GO:2000973    regulation of pro-B cell differentiation         6
## 10 GO:0021861 forebrain radial glial cell differentiat...         6
## 11 GO:0018230        peptidyl-L-cysteine S-palmitoylation         6
## 12 GO:1903039 positive regulation of leukocyte cell-ce...       195
## 13 GO:0046477          glycosylceramide catabolic process         7
## 14 GO:1903799 negative regulation of production of miR...         7
## 15 GO:0061101         neuroendocrine cell differentiation         7
## 16 GO:0072050                 S-shaped body morphogenesis         7
## 17 GO:0061626        pharyngeal arch artery morphogenesis         7
## 18 GO:0042117                         monocyte activation         7
## 19 GO:0048664                   neuron fate determination         7
## 20 GO:0045607 regulation of auditory receptor cell dif...         8
## 21 GO:0072282    metanephric nephron tubule morphogenesis         8
## 22 GO:0097084     vascular smooth muscle cell development         8
## 23 GO:0007262            STAT protein import into nucleus         8
## 24 GO:0016139                 glycoside catabolic process         8
## 25 GO:1903726 negative regulation of phospholipid meta...         8
## 26 GO:0061309 cardiac neural crest cell development in...         9
## 27 GO:0018026             peptidyl-lysine monomethylation         9
## 28 GO:0003266 regulation of secondary heart field card...         9
## 29 GO:0030917     midbrain-hindbrain boundary development         9
## 30 GO:0070486                       leukocyte aggregation         9
## 31 GO:0072567 chemokine (C-X-C motif) ligand 2 product...         9
##    Significant Expected weightFisher
## 1            2     0.06       0.0018
## 2            1     0.01       0.0052
## 3            1     0.01       0.0052
## 4            1     0.01       0.0052
## 5            1     0.01       0.0052
## 6            1     0.01       0.0062
## 7            1     0.01       0.0062
## 8            1     0.01       0.0062
## 9            1     0.01       0.0062
## 10           1     0.01       0.0062
## 11           1     0.01       0.0062
## 12           2     0.20       0.0068
## 13           1     0.01       0.0072
## 14           1     0.01       0.0072
## 15           1     0.01       0.0072
## 16           1     0.01       0.0072
## 17           1     0.01       0.0072
## 18           1     0.01       0.0072
## 19           1     0.01       0.0072
## 20           1     0.01       0.0082
## 21           1     0.01       0.0082
## 22           1     0.01       0.0082
## 23           1     0.01       0.0082
## 24           1     0.01       0.0082
## 25           1     0.01       0.0082
## 26           1     0.01       0.0093
## 27           1     0.01       0.0093
## 28           1     0.01       0.0093
## 29           1     0.01       0.0093
## 30           1     0.01       0.0093
## 31           1     0.01       0.0093

Run clusterProfiler

#head(heart_ref_gene)
#length(heart_ref_gene)
#head(liver_ref_gene)
#length(liver_ref_gene)

#mydf <- as.data.frame(cbind(all_ref_gene_hg19_ensg, heart_ref_gene, liver_ref_gene))

#colnames(mydf) <- c("ensg", "heart", "liver")

#xx.formula.twogroups <- compareCluster(mydf$heart, data = mydf, fun="enrichGO", universe = df$ensg,
#                OrgDb         = org.Hs.eg.db)

#dotplot(xx.formula.twogroups, showCategory=5, includeAll=FALSE)
## Reivigo

# If you don't have the ggplot2 package installed, uncomment the following line:
# install.packages( "ggplot2" );
library( ggplot2 );
# --------------------------------------------------------------------------
# If you don't have the scales package installed, uncomment the following line:
# install.packages( "scales" );
library( scales );
## Warning: package 'scales' was built under R version 3.4.4
library(ggrepel)

# --------------------------------------------------------------------------
# Here is your data from REVIGO. Scroll down for plot configuration options.

revigo.names <- c("term_ID","description","frequency_%","plot_X","plot_Y","plot_size","value","uniqueness","dispensability");
revigo.data <- rbind(c("GO:0009113","purine nucleobase biosynthetic process", 0.124,-4.742,-3.192, 4.202, 0.0005,0.769,0.000),
c("GO:0033197","response to vitamin E", 0.000, 7.179,-3.273, 1.740, 0.0005,0.849,0.000),
c("GO:0019043","establishment of viral latency", 0.001,-5.135, 5.016, 1.959, 0.0187,0.945,0.018),
c("GO:0006501","C-terminal protein lipidation", 0.004, 0.003,-5.207, 2.693, 0.0120,0.913,0.100),
c("GO:0060509","Type I pneumocyte differentiation", 0.001,-0.420, 6.928, 2.121, 0.0134,0.698,0.103),
c("GO:1903078","positive regulation of protein localization to plasma membrane", 0.010, 1.621, 0.720, 3.123, 0.0068,0.688,0.118),
c("GO:0033632","regulation of cell-cell adhesion mediated by integrin", 0.002, 2.909, 0.449, 2.378, 0.0160,0.835,0.132),
c("GO:2000109","regulation of macrophage apoptotic process", 0.001, 2.079,-2.113, 2.188, 0.0187,0.768,0.132),
c("GO:0032007","negative regulation of TOR signaling", 0.016, 5.974, 0.117, 3.325, 0.0052,0.640,0.175),
c("GO:0043547","positive regulation of GTPase activity", 0.470, 4.687, 1.488, 4.780, 0.0054,0.838,0.179),
c("GO:0009256","10-formyltetrahydrofolate metabolic process", 0.002,-6.344,-1.530, 2.474, 0.0134,0.809,0.185),
c("GO:0070141","response to UV-A", 0.002, 6.586,-3.433, 2.369, 0.0160,0.883,0.188),
c("GO:0071499","cellular response to laminar fluid shear stress", 0.001, 6.586,-2.544, 2.143, 0.0187,0.852,0.211),
c("GO:0006139","nucleobase-containing compound metabolic process",26.547,-4.434,-2.184, 6.532, 0.0499,0.889,0.214),
c("GO:0018242","protein O-linked glycosylation via serine", 0.001,-1.029,-5.988, 2.117, 0.0160,0.818,0.218),
c("GO:0015942","formate metabolic process", 0.065,-5.564,-1.813, 3.921, 0.0160,0.815,0.246),
c("GO:0070601","centromeric sister chromatid cohesion", 0.004,-1.575,-1.033, 2.749, 0.0266,0.829,0.253),
c("GO:0006863","purine nucleobase transport", 0.002,-4.632, 3.161, 2.464, 0.0160,0.877,0.268),
c("GO:0032026","response to magnesium ion", 0.002, 6.932,-3.864, 2.389, 0.0473,0.863,0.290),
c("GO:0070375","ERK5 cascade", 0.001, 3.865,-3.823, 2.041, 0.0134,0.729,0.303),
c("GO:0060070","canonical Wnt signaling pathway", 0.061, 4.991,-1.442, 3.893, 0.0066,0.678,0.315),
c("GO:0032486","Rap protein signal transduction", 0.002, 6.347,-0.924, 2.458, 0.0344,0.763,0.318),
c("GO:0061430","bone trabecula morphogenesis", 0.003,-2.302, 6.815, 2.553, 0.0344,0.798,0.326),
c("GO:0001957","intramembranous ossification", 0.001, 0.332, 7.419, 2.167, 0.0160,0.766,0.343),
c("GO:0009120","deoxyribonucleoside metabolic process", 0.006,-4.177,-4.617, 2.908, 0.0187,0.800,0.350),
c("GO:0007386","compartment pattern specification", 0.001, 0.048, 7.245, 2.111, 0.0134,0.749,0.356),
c("GO:0045351","type I interferon biosynthetic process", 0.002, 0.347, 6.406, 2.505, 0.0318,0.743,0.357),
c("GO:0001821","histamine secretion", 0.001,-3.314, 2.812, 2.143, 0.0240,0.877,0.358),
c("GO:0042321","negative regulation of circadian sleep/wake cycle, sleep", 0.001, 2.446, 5.266, 1.845, 0.0134,0.663,0.369),
c("GO:0006744","ubiquinone biosynthetic process", 0.136,-5.223,-2.418, 4.242, 0.0396,0.786,0.389),
c("GO:0009146","purine nucleoside triphosphate catabolic process", 0.035,-3.242,-4.729, 3.647, 0.0134,0.749,0.391),
c("GO:0046085","adenosine metabolic process", 0.003,-4.584,-4.405, 2.524, 0.0266,0.804,0.409),
c("GO:0060405","regulation of penile erection", 0.000, 1.798, 6.090, 1.716, 0.0160,0.701,0.410),
c("GO:0045725","positive regulation of glycogen biosynthetic process", 0.003, 0.849,-3.136, 2.547, 0.0396,0.723,0.414),
c("GO:1901642","nucleoside transmembrane transport", 0.047,-3.483, 2.842, 3.781, 0.0187,0.860,0.432),
c("GO:0048702","embryonic neurocranium morphogenesis", 0.002, 0.142, 6.947, 2.444, 0.0187,0.711,0.433),
c("GO:0048311","mitochondrion distribution", 0.025,-3.646, 1.645, 3.502, 0.0318,0.901,0.436),
c("GO:0046060","dATP metabolic process", 0.001,-3.497,-5.142, 2.207, 0.0134,0.789,0.437),
c("GO:1903071","positive regulation of ER-associated ubiquitin-dependent protein catabolic process", 0.001, 4.542,-2.446, 2.137, 0.0187,0.618,0.440),
c("GO:0033327","Leydig cell differentiation", 0.002,-0.252, 6.634, 2.407, 0.0292,0.686,0.450),
c("GO:0048541","Peyer's patch development", 0.002, 1.607, 6.672, 2.316, 0.0134,0.674,0.470),
c("GO:0060368","regulation of Fc receptor mediated stimulatory signaling pathway", 0.001, 6.210, 1.102, 1.919, 0.0160,0.646,0.480),
c("GO:0006351","transcription, DNA-templated",10.659,-3.806,-3.709, 6.136, 0.0041,0.852,0.480),
c("GO:0072530","purine-containing compound transmembrane transport", 0.016,-4.166, 2.833, 3.304, 0.0213,0.934,0.483),
c("GO:0072531","pyrimidine-containing compound transmembrane transport", 0.019,-4.073, 2.881, 3.391, 0.0422,0.934,0.489),
c("GO:0070255","regulation of mucus secretion", 0.002, 1.155, 4.966, 2.504, 0.0240,0.662,0.494),
c("GO:0038180","nerve growth factor signaling pathway", 0.001, 6.875,-1.582, 2.190, 0.0213,0.729,0.496),
c("GO:0009168","purine ribonucleoside monophosphate biosynthetic process", 1.043,-3.650,-3.225, 5.127, 0.0136,0.734,0.509),
c("GO:0033601","positive regulation of mammary gland epithelial cell proliferation", 0.002, 2.665, 4.905, 2.398, 0.0187,0.604,0.519),
c("GO:0000122","negative regulation of transcription from RNA polymerase II promoter", 0.199, 1.681,-2.816, 4.408, 0.0439,0.725,0.526),
c("GO:0001973","adenosine receptor signaling pathway", 0.005, 7.122,-0.404, 2.792, 0.0292,0.738,0.536),
c("GO:0038063","collagen-activated tyrosine kinase receptor signaling pathway", 0.005, 6.816,-0.631, 2.810, 0.0213,0.742,0.537),
c("GO:0008354","germ cell migration", 0.004,-0.730, 5.666, 2.729, 0.0213,0.700,0.552),
c("GO:0071320","cellular response to cAMP", 0.007, 7.142,-2.642, 2.937, 0.0071,0.810,0.553),
c("GO:0008277","regulation of G-protein coupled receptor protein signaling pathway", 0.024, 6.792, 0.170, 3.494, 0.0209,0.691,0.554),
c("GO:2000178","negative regulation of neural precursor cell proliferation", 0.005, 4.025, 1.444, 2.838, 0.0499,0.775,0.556),
c("GO:0071361","cellular response to ethanol", 0.003, 7.311,-2.794, 2.550, 0.0344,0.820,0.562),
c("GO:0043650","dicarboxylic acid biosynthetic process", 0.530,-5.261,-1.661, 4.833, 0.0344,0.767,0.571),
c("GO:0051412","response to corticosterone", 0.001, 6.485,-3.908, 2.053, 0.0422,0.848,0.572),
c("GO:0034127","regulation of MyD88-independent toll-like receptor signaling pathway", 0.000, 6.487, 1.509, 1.531, 0.0213,0.663,0.590),
c("GO:0002313","mature B cell differentiation involved in immune response", 0.003, 3.280, 4.691, 2.642, 0.0234,0.567,0.592),
c("GO:0032261","purine nucleotide salvage", 0.059,-4.165,-4.110, 3.878, 0.0187,0.768,0.593),
c("GO:0042118","endothelial cell activation", 0.002,-3.033, 0.342, 2.367, 0.0266,0.837,0.600),
c("GO:0003139","secondary heart field specification", 0.002, 0.004, 7.063, 2.358, 0.0240,0.711,0.600),
c("GO:0048387","negative regulation of retinoic acid receptor signaling pathway", 0.002, 6.096, 0.051, 2.394, 0.0213,0.681,0.601),
c("GO:0043249","erythrocyte maturation", 0.002, 2.358, 5.650, 2.461, 0.0292,0.610,0.607),
c("GO:0046130","purine ribonucleoside catabolic process", 0.036,-3.056,-4.878, 3.660, 0.0187,0.745,0.609),
c("GO:0001829","trophectodermal cell differentiation", 0.003,-0.657, 6.646, 2.521, 0.0396,0.689,0.620),
c("GO:0055015","ventricular cardiac muscle cell development", 0.003,-0.775, 6.758, 2.553, 0.0318,0.678,0.622),
c("GO:0002819","regulation of adaptive immune response", 0.025, 6.335, 1.438, 3.513, 0.0184,0.673,0.642),
c("GO:0051446","positive regulation of meiotic cell cycle", 0.007, 3.995, 2.576, 2.925, 0.0422,0.712,0.643),
c("GO:0014807","regulation of somitogenesis", 0.002, 1.564, 6.051, 2.342, 0.0213,0.651,0.660),
c("GO:0035414","negative regulation of catenin import into nucleus", 0.002, 1.162, 1.277, 2.497, 0.0266,0.750,0.669),
c("GO:0018243","protein O-linked glycosylation via threonine", 0.001,-1.054,-6.012, 2.201, 0.0187,0.817,0.676),
c("GO:0046654","tetrahydrofolate biosynthetic process", 0.101,-5.448,-2.197, 4.113, 0.0187,0.765,0.680),
c("GO:0060340","positive regulation of type I interferon-mediated signaling pathway", 0.002, 6.123, 0.385, 2.412, 0.0213,0.608,0.680),
c("GO:0046386","deoxyribose phosphate catabolic process", 0.023,-1.891,-6.345, 3.469, 0.0448,0.843,0.684),
c("GO:2000059","negative regulation of protein ubiquitination involved in ubiquitin-dependent protein catabolic process", 0.003, 2.146,-4.195, 2.603, 0.0318,0.743,0.686),
c("GO:0036066","protein O-linked fucosylation", 0.002,-0.988,-6.193, 2.307, 0.0370,0.815,0.689),
c("GO:0039530","MDA-5 signaling pathway", 0.002, 4.143, 3.197, 2.346, 0.0240,0.527,0.698),
c("GO:0071157","negative regulation of cell cycle arrest", 0.005, 3.360,-0.231, 2.804, 0.0473,0.687,0.699),
c("GO:0035023","regulation of Rho protein signal transduction", 0.125, 6.365, 0.101, 4.206, 0.0400,0.660,0.699));

bjp<-
theme(
  panel.border = element_rect(colour = "black", fill = NA, size = 2),
  plot.title = element_text(size = 16, face = "bold"),
  axis.text.y =  element_text(size = 14,face = "bold",color = "black"),
  axis.text.x =  element_text(size = 14,face = "bold",color = "black"),
  axis.title.y = element_text(size = 14,face = "bold"),
  axis.title.x = element_text(size = 14,face = "bold"),
  legend.text = element_text(size = 14,face = "bold"),
  legend.title = element_text(size = 14,face = "bold"),
  strip.text.x = element_text(size = 14,face = "bold"),
  strip.text.y = element_text(size = 14,face = "bold"),
  strip.background = element_rect(colour = "black", size = 2))

one.data <- data.frame(revigo.data);
names(one.data) <- revigo.names;
one.data <- one.data [(one.data$plot_X != "null" & one.data$plot_Y != "null"), ];
one.data$plot_X <- as.numeric( as.character(one.data$plot_X) );
one.data$plot_Y <- as.numeric( as.character(one.data$plot_Y) );
one.data$plot_size <- as.numeric( as.character(one.data$plot_size) );
one.data$log10_p_value <- as.numeric( as.character(one.data$value) );
one.data$frequency <- as.numeric( as.character(one.data$frequency) );
one.data$uniqueness <- as.numeric( as.character(one.data$uniqueness) );
one.data$dispensability <- as.numeric( as.character(one.data$dispensability) );
# --------------------------------------------------------------------------


# This is a table indicating which labels you would like to plot (all data will be plotted, but you need to indicate whether the label should be displayed), make this table manually.


label1 <- c("GO:0070375","ERK5 cascade", 0.001, 3.865,-3.823, 2.041, 0.0134,0.729,0.303)

label2 <- c("GO:0060070","canonical Wnt signaling pathway", 0.061, 4.991,-1.442, 3.893, 0.0066,0.678,0.315) 

label3 <- c("GO:0055015","ventricular cardiac muscle cell development", 0.003,-0.775, 6.758, 2.553, 0.0318,0.678,0.622) 

label4 <- c("GO:0003139","secondary heart field specification", 0.002, 0.004, 7.063, 2.358, 0.0240,0.711,0.600)

ex <- as.data.frame(rbind(label1, label2, label3), stringsAsFactors = FALSE)
colnames(ex) <- c("term_ID","description","frequency_%","plot_X","plot_Y","plot_size","value","uniqueness","dispensability")

ggplot(data = one.data)+
geom_point( aes( plot_X, plot_Y, fill = log10_p_value, size = plot_size),shape=21,color="black",stroke=0.7, alpha = 0.8) + guides(size=guide_legend(title="No. of genes")) + scale_fill_gradient2(low = muted("red"), mid = "white",
  high = muted("blue")) + guides(fill=guide_legend(title="P value"), size=guide_legend(title="No. of genes")) +
#scale_size( range=c(5, 20)) +
  labs (y = "Semantic space x", x = "Semantic space y") + theme_bw()  + geom_label_repel(ex, mapping = aes(as.numeric(ex$plot_X), as.numeric(ex$plot_Y), label = description),
    fontface = 'bold', color = 'black',
    box.padding = 0.6, point.padding = 1.6,
        size = 3,
    # Width of the line segments.
    segment.size = 0,
    # Strength of the repulsion force.
    force = 1)

## Revigo liver


revigo.names <- c("term_ID","description","frequency_%","plot_X","plot_Y","plot_size","value","uniqueness","dispensability");
revigo.data <- rbind(c("GO:0002576","platelet degranulation", 0.003, 0.496,-3.539, 2.519, 0.0000,0.842,0.000),
c("GO:0008152","metabolic process",75.387, 0.813,-0.564, 6.986, 0.0308,0.998,0.000),
c("GO:0009820","alkaloid metabolic process", 0.006, 4.043, 1.159, 2.869, 0.0000,0.947,0.000),
c("GO:0042632","cholesterol homeostasis", 0.019,-3.975, 2.639, 3.381, 0.0000,0.818,0.000),
c("GO:0019835","cytolysis", 0.044, 0.583,-0.452, 3.749, 0.0007,0.971,0.018),
c("GO:0001887","selenium compound metabolic process", 0.008, 0.543,-0.357, 2.990, 0.0189,0.963,0.019),
c("GO:0042157","lipoprotein metabolic process", 0.210, 0.683,-0.584, 4.431, 0.0116,0.957,0.028),
c("GO:0017144","drug metabolic process", 0.058, 0.564,-0.386, 3.868, 0.0392,0.959,0.035),
c("GO:0070989","oxidative demethylation", 0.011, 1.605, 0.368, 3.133, 0.0064,0.912,0.041),
c("GO:0019740","nitrogen utilization", 0.085, 0.842,-0.775, 4.038, 0.0189,0.932,0.050),
c("GO:0019695","choline metabolic process", 0.016, 0.978, 0.110, 3.301, 0.0060,0.962,0.063),
c("GO:0048625","myoblast fate commitment", 0.001, 0.704,-6.082, 2.100, 0.0189,0.822,0.085),
c("GO:0006368","transcription elongation from RNA polymerase II promoter", 0.082, 1.989, 1.876, 4.021, 0.0139,0.890,0.085),
c("GO:0051081","nuclear envelope disassembly", 0.002, 0.688,-0.443, 2.338, 0.0116,0.883,0.087),
c("GO:0006957","complement activation, alternative pathway", 0.000,-6.310, 2.907, 1.771, 0.0003,0.689,0.121),
c("GO:0017187","peptidyl-glutamic acid carboxylation", 0.006,-0.167, 1.374, 2.865, 0.0011,0.918,0.125),
c("GO:0006629","lipid metabolic process", 3.522, 3.786, 1.634, 5.655, 0.0144,0.876,0.126),
c("GO:0034384","high-density lipoprotein particle clearance", 0.002,-3.947,-4.331, 2.294, 0.0001,0.733,0.128),
c("GO:1904683","regulation of metalloendopeptidase activity", 0.001,-3.073, 3.701, 2.210, 0.0017,0.790,0.131),
c("GO:0006641","triglyceride metabolic process", 0.038, 3.327, 1.871, 3.687, 0.0001,0.832,0.140),
c("GO:0051647","nucleus localization", 0.012,-0.140,-3.521, 3.198, 0.0117,0.927,0.141),
c("GO:0042738","exogenous drug catabolic process", 0.001,-1.097, 3.996, 2.193, 0.0009,0.834,0.156),
c("GO:0070141","response to UV-A", 0.002,-3.392, 1.485, 2.369, 0.0017,0.918,0.166),
c("GO:0006768","biotin metabolic process", 0.081, 5.718, 3.455, 4.018, 0.0007,0.805,0.167),
c("GO:1901160","primary amino compound metabolic process", 0.015, 4.703, 1.279, 3.290, 0.0455,0.944,0.178),
c("GO:0043627","response to estrogen", 0.010,-3.637, 1.577, 3.128, 0.0379,0.912,0.184),
c("GO:0099133","ATP hydrolysis coupled anion transmembrane transport", 0.271,-0.029,-4.342, 4.541, 0.0152,0.919,0.184),
c("GO:0010646","regulation of cell communication", 0.929,-6.450, 3.995, 5.076, 0.0173,0.784,0.190),
c("GO:0007041","lysosomal transport", 0.017, 0.052,-3.377, 3.341, 0.0395,0.947,0.196),
c("GO:0042158","lipoprotein biosynthetic process", 0.192, 0.957, 0.864, 4.391, 0.0189,0.919,0.199),
c("GO:1990535","neuron projection maintenance", 0.000, 0.267,-0.657, 1.301, 0.0275,0.897,0.200),
c("GO:0052652","cyclic purine nucleotide metabolic process", 0.094, 4.152, 4.182, 4.082, 0.0457,0.804,0.234),
c("GO:0006390","transcription from mitochondrial promoter", 0.005, 0.802, 1.042, 2.794, 0.0484,0.915,0.235),
c("GO:0006600","creatine metabolic process", 0.002, 5.427, 2.495, 2.328, 0.0374,0.857,0.244),
c("GO:0042127","regulation of cell proliferation", 0.313,-5.699, 3.430, 4.603, 0.0229,0.785,0.259),
c("GO:0016540","protein autoprocessing", 0.011,-0.432, 1.323, 3.147, 0.0191,0.924,0.274),
c("GO:0006476","protein deacetylation", 0.072,-0.361, 1.247, 3.967, 0.0455,0.913,0.278),
c("GO:0055114","oxidation-reduction process",15.060, 4.858, 2.263, 6.286, 0.0206,0.872,0.300),
c("GO:0015886","heme transport", 0.066,-0.143,-4.692, 3.927, 0.0044,0.860,0.302),
c("GO:0006461","protein complex assembly", 0.960, 0.086,-0.705, 5.091, 0.0285,0.920,0.312),
c("GO:0060052","neurofilament cytoskeleton organization", 0.002, 1.098,-0.364, 2.501, 0.0374,0.883,0.319),
c("GO:0010918","positive regulation of mitochondrial membrane potential", 0.001,-2.305, 1.655, 2.164, 0.0044,0.851,0.327),
c("GO:0033572","transferrin transport", 0.002,-0.385,-3.961, 2.449, 0.0180,0.915,0.333),
c("GO:0006524","alanine catabolic process", 0.025, 5.584, 3.621, 3.509, 0.0009,0.800,0.338),
c("GO:0097267","omega-hydroxylase P450 pathway", 0.000, 4.727, 2.654, 1.000, 0.0017,0.851,0.339),
c("GO:0019627","urea metabolic process", 0.051, 4.765, 2.756, 3.820, 0.0455,0.846,0.339),
c("GO:1903830","magnesium ion transmembrane transport", 0.088, 0.021,-4.086, 4.053, 0.0339,0.938,0.340),
c("GO:1904179","positive regulation of adipose tissue development", 0.001,-5.562,-3.251, 1.996, 0.0009,0.657,0.344),
c("GO:0007584","response to nutrient", 0.026,-3.941, 1.599, 3.530, 0.0010,0.865,0.345),
c("GO:0090400","stress-induced premature senescence", 0.002,-4.185,-2.548, 2.350, 0.0484,0.752,0.346),
c("GO:0042908","xenobiotic transport", 0.028,-0.145,-4.010, 3.556, 0.0064,0.876,0.348),
c("GO:0060021","palate development", 0.018,-0.008,-5.732, 3.363, 0.0445,0.883,0.355),
c("GO:0033860","regulation of NAD(P)H oxidase activity", 0.001,-3.702, 4.403, 2.057, 0.0275,0.831,0.356),
c("GO:0045725","positive regulation of glycogen biosynthetic process", 0.003,-0.905, 5.340, 2.547, 0.0039,0.686,0.357),
c("GO:0001907","killing by symbiont of host cells", 0.008,-3.276, 2.257, 3.001, 0.0009,0.818,0.362),
c("GO:0016310","phosphorylation", 7.764, 2.492, 2.058, 5.998, 0.0064,0.912,0.371),
c("GO:0050892","intestinal absorption", 0.006,-0.941,-7.157, 2.913, 0.0016,0.782,0.379),
c("GO:1900107","regulation of nodal signaling pathway", 0.002,-6.525, 3.443, 2.316, 0.0275,0.740,0.380),
c("GO:0008090","retrograde axonal transport", 0.003, 0.413,-4.290, 2.521, 0.0374,0.827,0.390),
c("GO:0008203","cholesterol metabolic process", 0.028, 5.004, 2.844, 3.554, 0.0004,0.835,0.402),
c("GO:0070301","cellular response to hydrogen peroxide", 0.019,-5.078, 2.109, 3.397, 0.0023,0.823,0.402),
c("GO:0007628","adult walking behavior", 0.008,-0.864,-5.875, 3.000, 0.0288,0.814,0.411),
c("GO:0043433","negative regulation of sequence-specific DNA binding transcription factor activity", 0.031,-3.638, 4.535, 3.600, 0.0023,0.715,0.412),
c("GO:0051340","regulation of ligase activity", 0.002,-2.775, 3.103, 2.391, 0.0455,0.849,0.421),
c("GO:0097190","apoptotic signaling pathway", 0.117,-4.388, 4.477, 4.177, 0.0357,0.702,0.427),
c("GO:0006869","lipid transport", 0.270,-0.359,-4.931, 4.539, 0.0084,0.825,0.430),
c("GO:0045717","negative regulation of fatty acid biosynthetic process", 0.007, 0.731, 5.462, 2.935, 0.0030,0.667,0.434),
c("GO:0051004","regulation of lipoprotein lipase activity", 0.003,-2.788, 2.773, 2.587, 0.0089,0.840,0.435),
c("GO:0006536","glutamate metabolic process", 0.201, 5.539, 3.254, 4.412, 0.0020,0.816,0.441),
c("GO:0018279","protein N-linked glycosylation via asparagine", 0.015, 1.838, 3.560, 3.284, 0.0410,0.833,0.446),
c("GO:0046185","aldehyde catabolic process", 0.048, 4.214, 2.777, 3.790, 0.0064,0.832,0.451),
c("GO:0032071","regulation of endodeoxyribonuclease activity", 0.003,-3.421, 4.017, 2.537, 0.0374,0.791,0.451),
c("GO:0008202","steroid metabolic process", 0.161, 3.336, 1.762, 4.315, 0.0005,0.851,0.451),
c("GO:0071156","regulation of cell cycle arrest", 0.013,-2.547, 3.556, 3.230, 0.0275,0.745,0.452),
c("GO:0071372","cellular response to follicle-stimulating hormone stimulus", 0.001,-4.403, 1.848, 2.220, 0.0118,0.859,0.460),
c("GO:0042940","D-amino acid transport", 0.003,-0.063,-4.288, 2.562, 0.0275,0.873,0.471),
c("GO:1903427","negative regulation of reactive oxygen species biosynthetic process", 0.002,-3.996, 3.252, 2.486, 0.0044,0.777,0.477),
c("GO:0042493","response to drug", 0.266,-5.108, 2.172, 4.534, 0.0449,0.863,0.477),
c("GO:0001889","liver development", 0.023,-1.101,-6.479, 3.471, 0.0009,0.766,0.478),
c("GO:0006545","glycine biosynthetic process", 0.079, 5.573, 3.375, 4.005, 0.0275,0.814,0.479),
c("GO:0019216","regulation of lipid metabolic process", 0.095,-2.218, 5.684, 4.086, 0.0052,0.745,0.482),
c("GO:0006591","ornithine metabolic process", 0.088, 5.478, 3.118, 4.052, 0.0484,0.825,0.483),
c("GO:0046838","phosphorylated carbohydrate dephosphorylation", 0.064, 1.908, 2.032, 3.915, 0.0455,0.926,0.484),
c("GO:0006739","NADP metabolic process", 0.357, 4.701, 4.332, 4.661, 0.0063,0.793,0.485),
c("GO:0006520","cellular amino acid metabolic process", 5.591, 5.069, 3.403, 5.856, 0.0148,0.779,0.487),
c("GO:2000343","positive regulation of chemokine (C-X-C motif) ligand 2 production", 0.001,-5.534,-2.610, 1.959, 0.0275,0.680,0.489),
c("GO:0021814","cell motility involved in cerebral cortex radial glia guided migration", 0.001,-0.848,-6.605, 2.238, 0.0189,0.729,0.492),
c("GO:0022900","electron transport chain", 0.564, 3.324, 2.185, 4.860, 0.0186,0.848,0.493),
c("GO:0090277","positive regulation of peptide hormone secretion", 0.013,-4.678, 0.230, 3.226, 0.0004,0.601,0.496),
c("GO:0015942","formate metabolic process", 0.065, 5.321, 3.235, 3.921, 0.0017,0.832,0.499),
c("GO:0046329","negative regulation of JNK cascade", 0.007,-4.599, 5.445, 2.940, 0.0063,0.605,0.500),
c("GO:0001503","ossification", 0.074,-0.710,-6.573, 3.979, 0.0413,0.791,0.504),
c("GO:0006898","receptor-mediated endocytosis", 0.095, 0.278,-4.169, 4.086, 0.0004,0.935,0.511),
c("GO:0090239","regulation of histone H4 acetylation", 0.003,-3.175, 3.088, 2.531, 0.0455,0.769,0.511),
c("GO:0060059","embryonic retina morphogenesis in camera-type eye", 0.003,-0.909,-6.515, 2.573, 0.0029,0.790,0.512),
c("GO:1902459","positive regulation of stem cell population maintenance", 0.001,-4.906,-2.513, 2.236, 0.0275,0.663,0.517),
c("GO:1902959","regulation of aspartic-type endopeptidase activity involved in amyloid precursor protein catabolic process", 0.000,-2.854, 3.852, 1.756, 0.0455,0.802,0.517),
c("GO:0031100","animal organ regeneration", 0.005,-1.364,-6.064, 2.792, 0.0402,0.788,0.526),
c("GO:0061620","glycolytic process through glucose-6-phosphate", 0.002, 5.004, 4.281, 2.290, 0.0455,0.798,0.529),
c("GO:0002679","respiratory burst involved in defense response", 0.001,-3.703, 1.450, 2.258, 0.0374,0.816,0.535),
c("GO:0034504","protein localization to nucleus", 0.129,-0.313,-4.647, 4.218, 0.0210,0.899,0.539),
c("GO:0014911","positive regulation of smooth muscle cell migration", 0.003,-5.348,-0.209, 2.620, 0.0461,0.695,0.540),
c("GO:0061138","morphogenesis of a branching epithelium", 0.042,-1.117,-6.445, 3.730, 0.0021,0.775,0.541),
c("GO:0010793","regulation of mRNA export from nucleus", 0.003,-3.669,-1.160, 2.625, 0.0275,0.749,0.541),
c("GO:0072049","comma-shaped body morphogenesis", 0.001,-1.363,-5.960, 2.173, 0.0189,0.801,0.543),
c("GO:0046498","S-adenosylhomocysteine metabolic process", 0.066, 5.725, 3.823, 3.925, 0.0275,0.808,0.544),
c("GO:0045620","negative regulation of lymphocyte differentiation", 0.008,-3.807,-2.279, 3.018, 0.0029,0.590,0.549),
c("GO:0019626","short-chain fatty acid catabolic process", 0.024, 5.002, 3.731, 3.490, 0.0189,0.779,0.556),
c("GO:0034356","NAD biosynthesis via nicotinamide riboside salvage pathway", 0.004, 4.983, 4.281, 2.691, 0.0235,0.828,0.560),
c("GO:0046620","regulation of organ growth", 0.020,-4.335,-3.406, 3.403, 0.0044,0.688,0.561),
c("GO:0006778","porphyrin-containing compound metabolic process", 0.457, 5.545, 2.444, 4.768, 0.0453,0.896,0.561),
c("GO:0010269","response to selenium ion", 0.001,-3.417, 1.446, 1.991, 0.0189,0.892,0.564),
c("GO:0090335","regulation of brown fat cell differentiation", 0.003,-3.134,-2.585, 2.531, 0.0274,0.735,0.566),
c("GO:0000098","sulfur amino acid catabolic process", 0.008, 5.398, 3.462, 3.030, 0.0064,0.815,0.566),
c("GO:1905668","positive regulation of protein localization to endosome", 0.001,-4.713,-0.861, 2.079, 0.0275,0.732,0.569),
c("GO:0031016","pancreas development", 0.021,-1.275,-6.229, 3.438, 0.0453,0.774,0.573),
c("GO:0071397","cellular response to cholesterol", 0.001,-4.311, 1.853, 2.272, 0.0191,0.857,0.574),
c("GO:0060712","spongiotrophoblast layer development", 0.003,-1.057,-6.380, 2.533, 0.0191,0.790,0.578),
c("GO:0031659","positive regulation of cyclin-dependent protein serine/threonine kinase activity involved in G1/S transition of mitotic cell cycle", 0.002,-3.538, 4.926, 2.350, 0.0275,0.670,0.578),
c("GO:1903896","positive regulation of IRE1-mediated unfolded protein response", 0.001,-6.708, 3.143, 1.857, 0.0275,0.671,0.580),
c("GO:0071560","cellular response to transforming growth factor beta stimulus", 0.050,-4.466, 1.856, 3.804, 0.0127,0.832,0.582),
c("GO:0035733","hepatic stellate cell activation", 0.000, 0.492,-0.383, 1.748, 0.0455,0.898,0.584),
c("GO:0034763","negative regulation of transmembrane transport", 0.014,-4.429,-0.887, 3.251, 0.0274,0.743,0.585),
c("GO:0042167","heme catabolic process", 0.002, 5.014, 2.712, 2.408, 0.0374,0.834,0.586),
c("GO:0006853","carnitine shuttle", 0.000,-0.241,-4.610, 1.114, 0.0089,0.860,0.587),
c("GO:0007411","axon guidance", 0.066,-3.249,-3.116, 3.930, 0.0223,0.617,0.587),
c("GO:0048013","ephrin receptor signaling pathway", 0.016,-6.783, 3.560, 3.308, 0.0309,0.752,0.593),
c("GO:0034340","response to type I interferon", 0.008,-5.227, 1.865, 3.007, 0.0117,0.821,0.598),
c("GO:0031325","positive regulation of cellular metabolic process", 1.004,-6.818, 4.034, 5.110, 0.0233,0.668,0.602),
c("GO:0042177","negative regulation of protein catabolic process", 0.025,-2.311, 5.307, 3.513, 0.0177,0.698,0.606),
c("GO:1900016","negative regulation of cytokine production involved in inflammatory response", 0.002,-5.942,-1.181, 2.391, 0.0191,0.603,0.606),
c("GO:2000463","positive regulation of excitatory postsynaptic potential", 0.003,-6.002, 3.548, 2.606, 0.0220,0.638,0.607),
c("GO:0098902","regulation of membrane depolarization during action potential", 0.001,-3.098, 2.354, 2.188, 0.0455,0.827,0.608),
c("GO:1902237","positive regulation of endoplasmic reticulum stress-induced intrinsic apoptotic signaling pathway", 0.002,-6.892, 3.889, 2.324, 0.0089,0.647,0.617),
c("GO:0008635","activation of cysteine-type endopeptidase activity involved in apoptotic process by cytochrome c", 0.001,-5.340, 4.602, 2.279, 0.0484,0.665,0.619),
c("GO:2000271","positive regulation of fibroblast apoptotic process", 0.002,-6.128, 4.110, 2.320, 0.0374,0.726,0.621),
c("GO:0050730","regulation of peptidyl-tyrosine phosphorylation", 0.045,-2.927, 5.206, 3.759, 0.0115,0.746,0.627),
c("GO:0014068","positive regulation of phosphatidylinositol 3-kinase signaling", 0.013,-6.835, 3.378, 3.216, 0.0477,0.656,0.627),
c("GO:0032148","activation of protein kinase B activity", 0.004,-3.889, 4.818, 2.738, 0.0411,0.689,0.628),
c("GO:0000122","negative regulation of transcription from RNA polymerase II promoter", 0.199,-3.286, 4.570, 4.408, 0.0343,0.704,0.628),
c("GO:2000659","regulation of interleukin-1-mediated signaling pathway", 0.001,-6.610, 3.398, 2.117, 0.0275,0.727,0.629),
c("GO:0043392","negative regulation of DNA binding", 0.014,-3.451, 2.990, 3.256, 0.0324,0.830,0.631),
c("GO:0042426","choline catabolic process", 0.001, 4.091, 2.217, 2.230, 0.0275,0.898,0.632),
c("GO:1900119","positive regulation of execution phase of apoptosis", 0.003,-5.254, 3.939, 2.528, 0.0152,0.720,0.635),
c("GO:0046487","glyoxylate metabolic process", 0.049, 5.512, 3.224, 3.794, 0.0484,0.832,0.635),
c("GO:0030917","midbrain-hindbrain boundary development", 0.003,-0.916,-6.603, 2.576, 0.0064,0.776,0.636),
c("GO:0031663","lipopolysaccharide-mediated signaling pathway", 0.010,-6.114, 3.001, 3.128, 0.0363,0.713,0.638),
c("GO:1901679","nucleotide transmembrane transport", 0.011,-0.193,-4.465, 3.131, 0.0374,0.862,0.638),
c("GO:0032933","SREBP signaling pathway", 0.008,-6.008, 4.192, 2.989, 0.0484,0.656,0.640),
c("GO:2000646","positive regulation of receptor catabolic process", 0.001,-2.717, 5.596, 1.881, 0.0189,0.731,0.642),
c("GO:0071276","cellular response to cadmium ion", 0.009,-4.925, 2.123, 3.077, 0.0040,0.848,0.644),
c("GO:0000060","protein import into nucleus, translocation", 0.012,-0.358,-4.229, 3.175, 0.0286,0.853,0.645),
c("GO:0006548","histidine catabolic process", 0.052, 5.484, 3.853, 3.828, 0.0044,0.789,0.649),
c("GO:0034638","phosphatidylcholine catabolic process", 0.002, 4.403, 3.693, 2.328, 0.0374,0.793,0.652),
c("GO:1901523","icosanoid catabolic process", 0.000, 5.088, 3.154, 1.732, 0.0189,0.841,0.653),
c("GO:0006559","L-phenylalanine catabolic process", 0.031, 5.469, 3.690, 3.596, 0.0118,0.794,0.657),
c("GO:0051642","centrosome localization", 0.007,-0.128,-3.483, 2.973, 0.0190,0.928,0.658),
c("GO:0010040","response to iron(II) ion", 0.002,-4.034, 1.687, 2.407, 0.0189,0.886,0.658),
c("GO:0000042","protein targeting to Golgi", 0.021,-0.296,-4.424, 3.422, 0.0374,0.900,0.670),
c("GO:0090201","negative regulation of release of cytochrome c from mitochondria", 0.003,-5.536, 3.408, 2.591, 0.0285,0.651,0.672),
c("GO:0006855","drug transmembrane transport", 0.189,-4.897,-2.034, 4.384, 0.0138,0.752,0.672),
c("GO:0009887","animal organ morphogenesis", 0.264,-1.342,-6.260, 4.530, 0.0333,0.742,0.673),
c("GO:0006879","cellular iron ion homeostasis", 0.110,-1.935, 4.780, 4.148, 0.0002,0.750,0.674),
c("GO:0030178","negative regulation of Wnt signaling pathway", 0.037,-5.011, 4.290, 3.681, 0.0367,0.636,0.676),
c("GO:0007262","STAT protein import into nucleus", 0.001,-5.800,-0.845, 2.223, 0.0484,0.729,0.677),
c("GO:0045739","positive regulation of DNA repair", 0.013,-5.991, 3.701, 3.205, 0.0123,0.635,0.678),
c("GO:0034142","toll-like receptor 4 signaling pathway", 0.006,-6.777, 3.183, 2.892, 0.0191,0.642,0.679),
c("GO:0046326","positive regulation of glucose import", 0.007,-6.120,-0.943, 2.979, 0.0287,0.694,0.680),
c("GO:0042773","ATP synthesis coupled electron transport", 0.221, 4.104, 4.074, 4.452, 0.0456,0.794,0.685),
c("GO:0010886","positive regulation of cholesterol storage", 0.002,-5.084,-0.414, 2.320, 0.0374,0.711,0.685),
c("GO:0001568","blood vessel development", 0.136,-1.287,-6.290, 4.243, 0.0452,0.758,0.689),
c("GO:0006505","GPI anchor metabolic process", 0.102, 3.442, 3.385, 4.115, 0.0456,0.805,0.690),
c("GO:0046439","L-cysteine metabolic process", 0.039, 6.129, 3.195, 3.699, 0.0374,0.827,0.692),
c("GO:0051897","positive regulation of protein kinase B signaling", 0.016,-6.848, 3.394, 3.304, 0.0485,0.653,0.692),
c("GO:0032287","peripheral nervous system myelin maintenance", 0.002,-0.055,-6.738, 2.468, 0.0374,0.750,0.693),
c("GO:1904468","negative regulation of tumor necrosis factor secretion", 0.001,-3.544,-4.697, 1.982, 0.0374,0.625,0.695),
c("GO:1903874","ferrous iron transmembrane transport", 0.028, 0.089,-4.022, 3.563, 0.0374,0.931,0.700));



one.data <- data.frame(revigo.data);
names(one.data) <- revigo.names;
one.data <- one.data [(one.data$plot_X != "null" & one.data$plot_Y != "null"), ];
one.data$plot_X <- as.numeric( as.character(one.data$plot_X) );
one.data$plot_Y <- as.numeric( as.character(one.data$plot_Y) );
one.data$plot_size <- as.numeric( as.character(one.data$plot_size) );
one.data$log10_p_value <- as.numeric( as.character(one.data$value) );
one.data$frequency <- as.numeric( as.character(one.data$frequency) );
one.data$uniqueness <- as.numeric( as.character(one.data$uniqueness) );
one.data$dispensability <- as.numeric( as.character(one.data$dispensability) );
# --------------------------------------------------------------------------


# This is a table indicating which labels you would like to plot (all data will be plotted, but you need to indicate whether the label should be displayed), make this table manually.


label1 <- c("GO:0042632","cholesterol homeostasis", 0.019,-3.975, 2.639, 3.381, 0.0000,0.818,0.000)

label2 <- c("GO:0017144","drug metabolic process", 0.058, 0.564,-0.386, 3.868, 0.0392,0.959,0.035) 

label3 <- c("GO:0006629","lipid metabolic process", 3.522, 3.786, 1.634, 5.655, 0.0144,0.876,0.126)



ex <- as.data.frame(rbind(label1, label2, label3), stringsAsFactors = FALSE)
colnames(ex) <- c("term_ID","description","frequency_%","plot_X","plot_Y","plot_size","value","uniqueness","dispensability")

ggplot(data = one.data)+
geom_point( aes( plot_X, plot_Y, fill = log10_p_value, size = plot_size),shape=21,color="black",stroke=0.7, alpha = 0.8) + guides(size=guide_legend(title="No. of genes")) + scale_fill_gradient2(low = muted("red"), mid = "white",
  high = muted("blue")) + guides(fill=guide_legend(title="P value"), size=guide_legend(title="No. of genes")) +
#scale_size( range=c(5, 20)) +
  labs (y = "Semantic space x", x = "Semantic space y") + theme_bw()  + geom_label_repel(ex, mapping = aes(as.numeric(ex$plot_X), as.numeric(ex$plot_Y), label = description),
    fontface = 'bold', color = 'black',
    box.padding = 0.6, point.padding = 1.6,
        size = 3,
    # Width of the line segments.
    segment.size = 0,
    # Strength of the repulsion force.
    force = 1)