The goal of this script is to compare the conserved tissue specific data with the tissue’s ranking in the GTEx data v6 data.
# Import GTEx data
gtex_data <- read.csv("../data/GTEx_genes_tissues.csv", stringsAsFactors = FALSE)
# Find which genes are in both datasets
cpm_12184 <- read.delim("../../../Reg_Evo_Primates/data/cpm_12184.txt")
# Tissue up/downreg
heart_upreg_data <- read.csv("../data/specific_upreg_heart_genes.csv", stringsAsFactors = FALSE)
heart_downreg_data <- read.csv("../data/specific_downreg_heart_genes.csv", stringsAsFactors = FALSE)
kidney_upreg_data <- read.csv("../data/specific_upreg_kidney_genes.csv", stringsAsFactors = FALSE)
kidney_downreg_data <- read.csv("../data/specific_downreg_kidney_genes.csv", stringsAsFactors = FALSE)
liver_upreg_data <- read.csv("../data/specific_upreg_liver_genes.csv", stringsAsFactors = FALSE)
liver_downreg_data <- read.csv("../data/specific_downreg_liver_genes.csv", stringsAsFactors = FALSE)
lung_upreg_data <- read.csv("../data/specific_upreg_lung_genes.csv", stringsAsFactors = FALSE)
lung_downreg_data <- read.csv("../data/specific_downreg_lung_genes.csv", stringsAsFactors = FALSE)
summary(rownames(cpm_12184) %in% gtex_data$ENSG)
## Mode FALSE TRUE
## logical 407 11777
11,777 genes are in both datasets.
# Subset
summary(heart_upreg_data$x %in% gtex_data$ENSG)
## Mode FALSE TRUE
## logical 14 354
inshared_lists = gtex_data$ENSG %in% heart_upreg_data$x
inshared_lists_data <- as.data.frame(inshared_lists)
counts_genes_in <- cbind(gtex_data, inshared_lists_data)
counts_genes_in_cutoff_pre <- subset(counts_genes_in, inshared_lists_data == "TRUE")
counts_genes_in_cutoff_pre <- counts_genes_in_cutoff_pre[,1:7]
# Is the heart the highest?
upreg_heart_ranking <- data.frame(counts_genes_in_cutoff_pre[,4:7], t(apply(-counts_genes_in_cutoff_pre[,4:7], 1, rank, ties.method='min')))
upreg_heart_ranking[,5]
## [1] 1 1 2 1 2 1 1 1 1 1 1 1 4 1 4 1 1 1 2 1 1 1 1 1 1 1 1 1 1 3 1 2 1 1 1
## [36] 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 2 2 3 1 1 1 1 1 1 1 1 1 2 1
## [71] 1 2 1 1 1 1 1 1 4 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [106] 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 2 1
## [141] 2 1 1 1 1 2 1 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 2
## [176] 1 1 1 2 3 1 3 1 1 1 1 1 1 3 1 2 4 1 1 1 1 1 1 3 1 1 2 1 1 1 1 1 1 1 1
## [211] 2 1 1 1 1 3 2 2 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1
## [246] 1 1 1 1 1 2 1 1 2 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 3 1 1 3 1 2 3 3 1 1 2
## [281] 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 2 1 1 2 1 3 2 1 1 1
## [316] 1 1 1 1 2 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1
## [351] 1 1 1 1
# Subset
summary(heart_downreg_data$x %in% gtex_data$ENSG)
## Mode FALSE TRUE
## logical 2 112
inshared_lists = gtex_data$ENSG %in% heart_downreg_data$x
inshared_lists_data <- as.data.frame(inshared_lists)
counts_genes_in <- cbind(gtex_data, inshared_lists_data)
counts_genes_in_cutoff_pre <- subset(counts_genes_in, inshared_lists_data == "TRUE")
counts_genes_in_cutoff_pre <- counts_genes_in_cutoff_pre[,1:7]
# Is the heart the lowest?
downreg_heart_ranking <- data.frame(counts_genes_in_cutoff_pre[,4:7], t(apply(-counts_genes_in_cutoff_pre[,4:7], 1, rank, ties.method='min')))
downreg_heart_ranking[,5]
## [1] 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [36] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [71] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [106] 4 4 4 4 4 4 4
ranking_in_gtex <- function(tissue_name){
# Subset
summary(tissue_name %in% gtex_data$ENSG)
inshared_lists = gtex_data$ENSG %in% tissue_name
inshared_lists_data <- as.data.frame(inshared_lists)
counts_genes_in <- cbind(gtex_data, inshared_lists_data)
counts_genes_in_cutoff_pre <- subset(counts_genes_in, inshared_lists_data == "TRUE")
counts_genes_in_cutoff_pre <- counts_genes_in_cutoff_pre[,1:7]
# Is the heart the highest?
upreg_heart_ranking <- data.frame(counts_genes_in_cutoff_pre[,4:7], t(apply(-counts_genes_in_cutoff_pre[,4:7], 1, rank, ties.method='min')))
return(upreg_heart_ranking)
}
upreg_heart_ranking <- ranking_in_gtex(heart_upreg_data$x)[,5]
downreg_heart_ranking <- ranking_in_gtex(heart_downreg_data$x)[,5]
upreg_kidney_ranking <- ranking_in_gtex(kidney_upreg_data$x)[,6]
downreg_kidney_ranking <- ranking_in_gtex(kidney_downreg_data$x)[,6]
upreg_liver_ranking <- ranking_in_gtex(liver_upreg_data$x)[,7]
downreg_liver_ranking <- ranking_in_gtex(liver_downreg_data$x)[,7]
upreg_lung_ranking <- ranking_in_gtex(lung_upreg_data$x)[,8]
downreg_lung_ranking <- ranking_in_gtex(lung_downreg_data$x)[,8]
# Heart
upreg_heart_num <- array(1, dim=c(length(upreg_heart_ranking), 1))
heart_cor1 <- cbind(upreg_heart_num, upreg_heart_ranking)
downreg_heart_num <- array(4, dim=c(length(downreg_heart_ranking), 1))
heart_cor2 <- cbind(downreg_heart_num, downreg_heart_ranking)
# Kidney
upreg_kidney_num <- array(1, dim=c(length(upreg_kidney_ranking), 1))
kidney_cor1 <- cbind(upreg_kidney_num, upreg_kidney_ranking)
downreg_kidney_num <- array(4, dim=c(length(downreg_kidney_ranking), 1))
kidney_cor2 <- cbind(downreg_kidney_num, downreg_kidney_ranking)
# Liver
upreg_liver_num <- array(1, dim=c(length(upreg_liver_ranking), 1))
liver_cor1 <- cbind(upreg_liver_num, upreg_liver_ranking)
downreg_liver_num <- array(4, dim=c(length(downreg_liver_ranking), 1))
liver_cor2 <- cbind(downreg_liver_num, downreg_liver_ranking)
# Lung
upreg_lung_num <- array(1, dim=c(length(upreg_lung_ranking), 1))
lung_cor1 <- cbind(upreg_lung_num, upreg_lung_ranking)
downreg_lung_num <- array(4, dim=c(length(downreg_lung_ranking), 1))
lung_cor2 <- cbind(downreg_lung_num, downreg_lung_ranking)
tissue_cor <- rbind(heart_cor1, heart_cor2, kidney_cor1, kidney_cor2, liver_cor1, liver_cor2, lung_cor1, lung_cor2)
# Get the correlation between tissues
cor(tissue_cor[,1], tissue_cor[,2])
## [1] 0.9143294
# How many times does the ranking in our data match the ranking in GTEx?
summary(tissue_cor[,1] == tissue_cor[,2])
## Mode FALSE TRUE
## logical 199 1540
1540/(199+1540)
## [1] 0.8855664
# Load libraries
library("topGO")
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colMeans,
## colnames, colSums, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply,
## lengths, Map, mapply, match, mget, order, paste, pmax,
## pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
## rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
## Loading required package: graph
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
##
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
##
## groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built.
##
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
##
## members
#library("biomaRt")
library("clusterProfiler")
## Loading required package: DOSE
## DOSE v3.4.0 For help: https://guangchuangyu.github.io/DOSE
##
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
## clusterProfiler v3.6.0 For help: https://guangchuangyu.github.io/clusterProfiler
##
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu., Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
library("org.Hs.eg.db")
##
# Combine the upregulated and downregulated tissues
combine_hearts <- rbind(heart_upreg_data, heart_downreg_data)
combine_kidneys <- rbind(kidney_upreg_data, kidney_downreg_data)
combine_livers <- rbind(liver_upreg_data, liver_downreg_data)
combine_lungs <- rbind(lung_upreg_data, lung_downreg_data)
# Matrix for which are in the test set
true_false <- rownames(cpm_12184) %in% combine_hearts$x
true_false <- as.numeric(true_false)
# Merge ENSG with true/false
test_gene <- as.vector(true_false)
names(test_gene) <- rownames(cpm_12184)
# Run topGO
go_data <- new("topGOdata",
ontology = "BP",
allGenes = test_gene,
geneSel = function(allScore){
return(allScore > 0.01)
},
nodeSize = 5,
annotationFun = annFUN.org,
mapping = "org.Hs.eg.db",
ID = "ensembl")
##
## Building most specific GOs .....
## ( 10269 GO terms found. )
##
## Build GO DAG topology ..........
## ( 14352 GO terms and 33417 relations. )
##
## Annotating nodes ...............
## ( 10194 genes annotated to the GO terms. )
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 4126 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 19: 1 nodes to be scored (0 eliminated genes)
##
## Level 18: 3 nodes to be scored (0 eliminated genes)
##
## Level 17: 4 nodes to be scored (17 eliminated genes)
##
## Level 16: 9 nodes to be scored (23 eliminated genes)
##
## Level 15: 28 nodes to be scored (39 eliminated genes)
##
## Level 14: 60 nodes to be scored (142 eliminated genes)
##
## Level 13: 118 nodes to be scored (532 eliminated genes)
##
## Level 12: 186 nodes to be scored (1207 eliminated genes)
##
## Level 11: 293 nodes to be scored (2865 eliminated genes)
##
## Level 10: 431 nodes to be scored (3997 eliminated genes)
##
## Level 9: 540 nodes to be scored (5462 eliminated genes)
##
## Level 8: 565 nodes to be scored (6990 eliminated genes)
##
## Level 7: 609 nodes to be scored (8125 eliminated genes)
##
## Level 6: 558 nodes to be scored (8955 eliminated genes)
##
## Level 5: 383 nodes to be scored (9425 eliminated genes)
##
## Level 4: 217 nodes to be scored (9726 eliminated genes)
##
## Level 3: 99 nodes to be scored (9895 eliminated genes)
##
## Level 2: 21 nodes to be scored (9976 eliminated genes)
##
## Level 1: 1 nodes to be scored (10069 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
orderBy = "weightFisher", ranksOf = "weightFisher",
topNodes = sum(score(go_test) < .01))
go_table
## GO.ID Term Annotated
## 1 GO:0030049 muscle filament sliding 23
## 2 GO:0045214 sarcomere organization 27
## 3 GO:0060048 cardiac muscle contraction 93
## 4 GO:0055010 ventricular cardiac muscle tissue morpho... 36
## 5 GO:0002026 regulation of the force of heart contrac... 23
## 6 GO:0071688 striated muscle myosin thick filament as... 8
## 7 GO:0010881 regulation of cardiac muscle contraction... 17
## 8 GO:0002027 regulation of heart rate 59
## 9 GO:0086064 cell communication by electrical couplin... 16
## 10 GO:0055003 cardiac myofibril assembly 14
## 11 GO:1903779 regulation of cardiac conduction 46
## 12 GO:0086012 membrane depolarization during cardiac m... 12
## 13 GO:0086091 regulation of heart rate by cardiac cond... 21
## 14 GO:0030240 skeletal muscle thin filament assembly 5
## 15 GO:0035994 response to muscle stretch 16
## 16 GO:0006942 regulation of striated muscle contractio... 61
## 17 GO:0006937 regulation of muscle contraction 94
## 18 GO:0048739 cardiac muscle fiber development 7
## 19 GO:0006941 striated muscle contraction 113
## 20 GO:0010460 positive regulation of heart rate 14
## 21 GO:0007512 adult heart development 8
## 22 GO:0003009 skeletal muscle contraction 20
## 23 GO:1903115 regulation of actin filament-based movem... 30
## 24 GO:0055008 cardiac muscle tissue morphogenesis 45
## 25 GO:0014733 regulation of skeletal muscle adaptation 9
## 26 GO:0003300 cardiac muscle hypertrophy 57
## 27 GO:0071313 cellular response to caffeine 5
## 28 GO:0007519 skeletal muscle tissue development 95
## 29 GO:0032780 negative regulation of ATPase activity 11
## 30 GO:0008016 regulation of heart contraction 139
## 31 GO:0086019 cell-cell signaling involved in cardiac ... 18
## 32 GO:0055075 potassium ion homeostasis 7
## 33 GO:0032781 positive regulation of ATPase activity 44
## 34 GO:0072659 protein localization to plasma membrane 182
## 35 GO:0032386 regulation of intracellular transport 422
## 36 GO:0006936 muscle contraction 208
## 37 GO:0060314 regulation of ryanodine-sensitive calciu... 20
## 38 GO:0010765 positive regulation of sodium ion transp... 23
## 39 GO:0014037 Schwann cell differentiation 27
## 40 GO:0089718 amino acid import across plasma membrane 8
## 41 GO:0010667 negative regulation of cardiac muscle ce... 16
## 42 GO:0000165 MAPK cascade 586
## 43 GO:0051443 positive regulation of ubiquitin-protein... 85
## 44 GO:0055119 relaxation of cardiac muscle 14
## 45 GO:0071872 cellular response to epinephrine stimulu... 9
## 46 GO:0010666 positive regulation of cardiac muscle ce... 9
## 47 GO:0051639 actin filament network formation 9
## 48 GO:0086014 atrial cardiac muscle cell action potent... 12
## 49 GO:0010649 regulation of cell communication by elec... 10
## 50 GO:0014898 cardiac muscle hypertrophy in response t... 18
## 51 GO:0014912 negative regulation of smooth muscle cel... 18
## 52 GO:0045445 myoblast differentiation 58
## 53 GO:0032469 endoplasmic reticulum calcium ion homeos... 19
## 54 GO:0005513 detection of calcium ion 10
## 55 GO:0007274 neuromuscular synaptic transmission 10
## 56 GO:0043092 L-amino acid import 10
## 57 GO:0007155 cell adhesion 883
## 58 GO:0060307 regulation of ventricular cardiac muscle... 11
## Significant Expected weightFisher
## 1 16 0.92 6.2e-18
## 2 14 1.08 2.7e-13
## 3 31 3.72 2.7e-13
## 4 13 1.44 3.1e-10
## 5 11 0.92 3.2e-10
## 6 6 0.32 1.0e-07
## 7 8 0.68 1.1e-07
## 8 21 2.36 1.5e-07
## 9 7 0.64 3.0e-07
## 10 7 0.56 4.2e-07
## 11 12 1.84 5.1e-07
## 12 6 0.48 3.0e-06
## 13 7 0.84 1.1e-05
## 14 4 0.20 1.2e-05
## 15 6 0.64 2.3e-05
## 16 16 2.44 5.9e-05
## 17 21 3.76 7.0e-05
## 18 4 0.28 8.0e-05
## 19 37 4.52 0.00012
## 20 5 0.56 0.00015
## 21 4 0.32 0.00016
## 22 6 0.80 0.00021
## 23 10 1.20 0.00024
## 24 17 1.80 0.00024
## 25 4 0.36 0.00027
## 26 13 2.28 0.00057
## 27 3 0.20 0.00060
## 28 13 3.80 0.00067
## 29 4 0.44 0.00067
## 30 33 5.56 0.00077
## 31 7 0.72 0.00113
## 32 3 0.28 0.00159
## 33 7 1.76 0.00166
## 34 18 7.28 0.00199
## 35 25 16.89 0.00216
## 36 50 8.32 0.00227
## 37 6 0.80 0.00301
## 38 5 0.92 0.00304
## 39 4 1.08 0.00306
## 40 3 0.32 0.00307
## 41 4 0.64 0.00314
## 42 28 23.45 0.00377
## 43 7 3.40 0.00396
## 44 5 0.56 0.00441
## 45 3 0.36 0.00446
## 46 3 0.36 0.00446
## 47 3 0.36 0.00446
## 48 4 0.48 0.00463
## 49 3 0.40 0.00465
## 50 4 0.72 0.00495
## 51 4 0.72 0.00495
## 52 6 2.32 0.00605
## 53 4 0.76 0.00607
## 54 3 0.40 0.00619
## 55 3 0.40 0.00619
## 56 3 0.40 0.00619
## 57 37 35.34 0.00673
## 58 3 0.44 0.00826
sig.genes <- sigGenes(go_data)
# Make table
write.csv(go_table, "../data/GO_conserved_tissue_specific_heart.csv", quote = FALSE, row.names = FALSE)
goresults <- sapply(go_table$GO.ID, function(x)
{
genes<-genesInTerm(go_data, x)
genes[[1]][genes[[1]] %in% sig.genes]
})
goresults["GO:0030049"] # muscle filament sliding "ENSG00000092054" "ENSG00000111245" "ENSG00000129991" "ENSG00000134571" "ENSG00000143632" "ENSG00000159251" "ENSG00000160808" "ENSG00000197616"
## $`GO:0030049`
## [1] "ENSG00000077522" "ENSG00000092054" "ENSG00000111245"
## [4] "ENSG00000114854" "ENSG00000118194" "ENSG00000129991"
## [7] "ENSG00000134571" "ENSG00000136842" "ENSG00000140416"
## [10] "ENSG00000143632" "ENSG00000155657" "ENSG00000159251"
## [13] "ENSG00000160808" "ENSG00000173991" "ENSG00000175084"
## [16] "ENSG00000197616"
goresults["GO:0060048"] # cardiac muscle contraction "ENSG00000092054" "ENSG00000111245" "ENSG00000129170" "ENSG00000129991" "ENSG00000134571" "ENSG00000159251" "ENSG00000160808" "ENSG00000197616"
## $`GO:0060048`
## [1] "ENSG00000018625" "ENSG00000055118" "ENSG00000057294"
## [4] "ENSG00000072062" "ENSG00000092054" "ENSG00000101400"
## [7] "ENSG00000104881" "ENSG00000105711" "ENSG00000111245"
## [10] "ENSG00000114279" "ENSG00000114854" "ENSG00000116783"
## [13] "ENSG00000118194" "ENSG00000118729" "ENSG00000129170"
## [16] "ENSG00000129991" "ENSG00000130037" "ENSG00000134571"
## [19] "ENSG00000140416" "ENSG00000145349" "ENSG00000151067"
## [22] "ENSG00000155657" "ENSG00000159251" "ENSG00000160808"
## [25] "ENSG00000165995" "ENSG00000173991" "ENSG00000174437"
## [28] "ENSG00000183023" "ENSG00000197616" "ENSG00000198523"
## [31] "ENSG00000198626"
goresults["GO:0055010"] # ventricular cardiac muscle tissue morpho "ENSG00000092054" "ENSG00000111245" "ENSG00000129991" "ENSG00000134571" "ENSG00000160808" "ENSG00000197616"
## $`GO:0055010`
## [1] "ENSG00000057294" "ENSG00000092054" "ENSG00000111245"
## [4] "ENSG00000114854" "ENSG00000118194" "ENSG00000129991"
## [7] "ENSG00000130939" "ENSG00000134571" "ENSG00000135547"
## [10] "ENSG00000140416" "ENSG00000160808" "ENSG00000197616"
## [13] "ENSG00000198626"
# Matrix for which are in the test set
true_false <- rownames(cpm_12184) %in% combine_kidneys$x
true_false <- as.numeric(true_false)
# Merge ENSG with true/false
test_gene <- as.vector(true_false)
names(test_gene) <- rownames(cpm_12184)
# Run topGO
go_data <- new("topGOdata",
ontology = "BP",
allGenes = test_gene,
geneSel = function(allScore){
return(allScore > 0.01)
},
nodeSize = 5,
annotationFun = annFUN.org,
mapping = "org.Hs.eg.db",
ID = "ensembl")
##
## Building most specific GOs .....
## ( 10269 GO terms found. )
##
## Build GO DAG topology ..........
## ( 14352 GO terms and 33417 relations. )
##
## Annotating nodes ...............
## ( 10194 genes annotated to the GO terms. )
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 2833 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 17: 5 nodes to be scored (0 eliminated genes)
##
## Level 16: 6 nodes to be scored (0 eliminated genes)
##
## Level 15: 15 nodes to be scored (24 eliminated genes)
##
## Level 14: 29 nodes to be scored (58 eliminated genes)
##
## Level 13: 59 nodes to be scored (213 eliminated genes)
##
## Level 12: 108 nodes to be scored (741 eliminated genes)
##
## Level 11: 172 nodes to be scored (2170 eliminated genes)
##
## Level 10: 251 nodes to be scored (3167 eliminated genes)
##
## Level 9: 335 nodes to be scored (4879 eliminated genes)
##
## Level 8: 386 nodes to be scored (6050 eliminated genes)
##
## Level 7: 431 nodes to be scored (7085 eliminated genes)
##
## Level 6: 445 nodes to be scored (8183 eliminated genes)
##
## Level 5: 314 nodes to be scored (9272 eliminated genes)
##
## Level 4: 177 nodes to be scored (9658 eliminated genes)
##
## Level 3: 80 nodes to be scored (9870 eliminated genes)
##
## Level 2: 19 nodes to be scored (9964 eliminated genes)
##
## Level 1: 1 nodes to be scored (10057 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
orderBy = "weightFisher", ranksOf = "weightFisher",
topNodes = sum(score(go_test) < .01))
go_table
## GO.ID Term Annotated
## 1 GO:0043252 sodium-independent organic anion transpo... 13
## 2 GO:0072235 metanephric distal tubule development 5
## 3 GO:0072205 metanephric collecting duct development 6
## 4 GO:2000665 regulation of interleukin-13 secretion 6
## 5 GO:0090383 phagosome acidification 17
## 6 GO:0015991 ATP hydrolysis coupled proton transport 19
## 7 GO:0006814 sodium ion transport 127
## 8 GO:0033572 transferrin transport 26
## 9 GO:0042953 lipoprotein transport 12
## 10 GO:0015698 inorganic anion transport 89
## 11 GO:1903825 organic acid transmembrane transport 70
## 12 GO:0007588 excretion 35
## 13 GO:0042985 negative regulation of amyloid precursor... 5
## 14 GO:2000662 regulation of interleukin-5 secretion 5
## 15 GO:0015838 amino-acid betaine transport 5
## 16 GO:2000147 positive regulation of cell motility 317
## 17 GO:0032927 positive regulation of activin receptor ... 6
## 18 GO:0046689 response to mercury ion 6
## 19 GO:1990573 potassium ion import across plasma membr... 6
## 20 GO:0042472 inner ear morphogenesis 42
## 21 GO:0045880 positive regulation of smoothened signal... 22
## 22 GO:0055074 calcium ion homeostasis 213
## 23 GO:0072182 regulation of nephron tubule epithelial ... 7
## 24 GO:0072079 nephron tubule formation 7
## 25 GO:0001657 ureteric bud development 61
## 26 GO:0051048 negative regulation of secretion 118
## 27 GO:0006590 thyroid hormone generation 8
## 28 GO:0071599 otic vesicle development 8
## 29 GO:0042359 vitamin D metabolic process 9
## 30 GO:0072488 ammonium transmembrane transport 9
## 31 GO:0032352 positive regulation of hormone metabolic... 9
## 32 GO:0002829 negative regulation of type 2 immune res... 9
## 33 GO:0046415 urate metabolic process 9
## 34 GO:0006020 inositol metabolic process 9
## 35 GO:0015695 organic cation transport 23
## Significant Expected weightFisher
## 1 4 0.19 2.6e-05
## 2 3 0.07 2.8e-05
## 3 3 0.09 5.6e-05
## 4 3 0.09 5.6e-05
## 5 4 0.24 8.3e-05
## 6 4 0.27 0.00013
## 7 9 1.82 0.00022
## 8 4 0.37 0.00047
## 9 3 0.17 0.00058
## 10 7 1.27 0.00070
## 11 7 1.00 0.00113
## 12 4 0.50 0.00150
## 13 2 0.07 0.00198
## 14 2 0.07 0.00198
## 15 2 0.07 0.00198
## 16 7 4.54 0.00292
## 17 2 0.09 0.00294
## 18 2 0.09 0.00294
## 19 2 0.09 0.00294
## 20 4 0.60 0.00296
## 21 3 0.32 0.00363
## 22 7 3.05 0.00397
## 23 2 0.10 0.00408
## 24 2 0.10 0.00408
## 25 5 0.87 0.00511
## 26 5 1.69 0.00529
## 27 2 0.11 0.00539
## 28 2 0.11 0.00539
## 29 2 0.13 0.00687
## 30 2 0.13 0.00687
## 31 2 0.13 0.00687
## 32 2 0.13 0.00687
## 33 2 0.13 0.00687
## 34 2 0.13 0.00687
## 35 5 0.33 0.00819
sig.genes <- sigGenes(go_data)
# Make table
write.csv(go_table, "../data/GO_conserved_tissue_specific_kidney.csv", quote = FALSE, row.names = FALSE)
goresults <- sapply(go_table$GO.ID, function(x)
{
genes<-genesInTerm(go_data, x)
genes[[1]][genes[[1]] %in% sig.genes]
})
goresults["GO:1903825"] # organic acid transmembrane transport "ENSG00000092068" "ENSG00000197891" "ENSG00000197901"
## $`GO:1903825`
## [1] "ENSG00000081479" "ENSG00000092068" "ENSG00000138079" "ENSG00000158296"
## [5] "ENSG00000167703" "ENSG00000197891" "ENSG00000197901"
goresults["GO:0072205"] # metanephric distal tubule development "ENSG00000104327" "ENSG00000167580"
## $`GO:0072205`
## [1] "ENSG00000104327" "ENSG00000125618" "ENSG00000167580"
goresults["GO:0015698"] # inorganic anion transport "ENSG00000074803" "ENSG00000197891" "ENSG00000197901"
## $`GO:0015698`
## [1] "ENSG00000074803" "ENSG00000114859" "ENSG00000126562" "ENSG00000131183"
## [5] "ENSG00000149452" "ENSG00000197891" "ENSG00000197901"
goresults["GO:0007588"] # excretion "ENSG00000167580" "ENSG00000169344"
## $`GO:0007588`
## [1] "ENSG00000116039" "ENSG00000166828" "ENSG00000167580" "ENSG00000169344"
# Matrix for which are in the test set
true_false <- rownames(cpm_12184) %in% combine_livers$x
true_false <- as.numeric(true_false)
# Merge ENSG with true/false
test_gene <- as.vector(true_false)
names(test_gene) <- rownames(cpm_12184)
# Run topGO
go_data <- new("topGOdata",
ontology = "BP",
allGenes = test_gene,
geneSel = function(allScore){
return(allScore > 0.01)
},
nodeSize = 5,
annotationFun = annFUN.org,
mapping = "org.Hs.eg.db",
ID = "ensembl")
##
## Building most specific GOs .....
## ( 10269 GO terms found. )
##
## Build GO DAG topology ..........
## ( 14352 GO terms and 33417 relations. )
##
## Annotating nodes ...............
## ( 10194 genes annotated to the GO terms. )
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 5362 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 18: 2 nodes to be scored (0 eliminated genes)
##
## Level 17: 6 nodes to be scored (0 eliminated genes)
##
## Level 16: 17 nodes to be scored (22 eliminated genes)
##
## Level 15: 40 nodes to be scored (51 eliminated genes)
##
## Level 14: 80 nodes to be scored (164 eliminated genes)
##
## Level 13: 159 nodes to be scored (571 eliminated genes)
##
## Level 12: 255 nodes to be scored (1339 eliminated genes)
##
## Level 11: 410 nodes to be scored (2934 eliminated genes)
##
## Level 10: 585 nodes to be scored (4171 eliminated genes)
##
## Level 9: 744 nodes to be scored (5750 eliminated genes)
##
## Level 8: 770 nodes to be scored (7136 eliminated genes)
##
## Level 7: 787 nodes to be scored (8242 eliminated genes)
##
## Level 6: 695 nodes to be scored (8991 eliminated genes)
##
## Level 5: 449 nodes to be scored (9448 eliminated genes)
##
## Level 4: 241 nodes to be scored (9751 eliminated genes)
##
## Level 3: 99 nodes to be scored (9903 eliminated genes)
##
## Level 2: 22 nodes to be scored (9977 eliminated genes)
##
## Level 1: 1 nodes to be scored (10068 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
orderBy = "weightFisher", ranksOf = "weightFisher",
topNodes = sum(score(go_test) < .01))
go_table
## GO.ID Term Annotated
## 1 GO:0006953 acute-phase response 27
## 2 GO:0030449 regulation of complement activation 34
## 3 GO:0055114 oxidation-reduction process 712
## 4 GO:0010951 negative regulation of endopeptidase act... 130
## 5 GO:0070328 triglyceride homeostasis 24
## 6 GO:0006958 complement activation, classical pathway 23
## 7 GO:0006536 glutamate metabolic process 24
## 8 GO:0017187 peptidyl-glutamic acid carboxylation 7
## 9 GO:0015721 bile acid and bile salt transport 19
## 10 GO:0042632 cholesterol homeostasis 53
## 11 GO:0006957 complement activation, alternative pathw... 11
## 12 GO:1904478 regulation of intestinal absorption 5
## 13 GO:0034378 chylomicron assembly 8
## 14 GO:0006641 triglyceride metabolic process 65
## 15 GO:0006695 cholesterol biosynthetic process 58
## 16 GO:0007597 blood coagulation, intrinsic pathway 14
## 17 GO:0031639 plasminogen activation 20
## 18 GO:0036498 IRE1-mediated unfolded protein response 62
## 19 GO:0043691 reverse cholesterol transport 15
## 20 GO:0010873 positive regulation of cholesterol ester... 7
## 21 GO:0019373 epoxygenase P450 pathway 7
## 22 GO:0002576 platelet degranulation 106
## 23 GO:0090110 cargo loading into COPII-coated vesicle 12
## 24 GO:0033700 phospholipid efflux 8
## 25 GO:0008202 steroid metabolic process 201
## 26 GO:0033344 cholesterol efflux 29
## 27 GO:0032782 bile acid secretion 5
## 28 GO:0051715 cytolysis in other organism 5
## 29 GO:0015942 formate metabolic process 5
## 30 GO:0006572 tyrosine catabolic process 5
## 31 GO:0008203 cholesterol metabolic process 104
## 32 GO:0019216 regulation of lipid metabolic process 278
## 33 GO:0006805 xenobiotic metabolic process 62
## 34 GO:0070508 cholesterol import 9
## 35 GO:0009071 serine family amino acid catabolic proce... 9
## 36 GO:0034372 very-low-density lipoprotein particle re... 9
## 37 GO:0034374 low-density lipoprotein particle remodel... 9
## 38 GO:0051918 negative regulation of fibrinolysis 9
## 39 GO:0006898 receptor-mediated endocytosis 207
## 40 GO:0034375 high-density lipoprotein particle remode... 14
## 41 GO:0017144 drug metabolic process 20
## 42 GO:0072329 monocarboxylic acid catabolic process 99
## 43 GO:0034384 high-density lipoprotein particle cleara... 10
## 44 GO:0001867 complement activation, lectin pathway 6
## 45 GO:0006207 'de novo' pyrimidine nucleobase biosynth... 6
## 46 GO:0034371 chylomicron remodeling 6
## 47 GO:0051917 regulation of fibrinolysis 12
## 48 GO:0006559 L-phenylalanine catabolic process 11
## 49 GO:0044270 cellular nitrogen compound catabolic pro... 421
## 50 GO:0030195 negative regulation of blood coagulation 38
## 51 GO:0042157 lipoprotein metabolic process 112
## 52 GO:0000098 sulfur amino acid catabolic process 7
## 53 GO:0051006 positive regulation of lipoprotein lipas... 7
## 54 GO:0008228 opsonization 7
## 55 GO:0001935 endothelial cell proliferation 94
## 56 GO:0045723 positive regulation of fatty acid biosyn... 12
## 57 GO:0045717 negative regulation of fatty acid biosyn... 12
## 58 GO:0050714 positive regulation of protein secretion 147
## 59 GO:0019835 cytolysis 23
## 60 GO:0006699 bile acid biosynthetic process 25
## 61 GO:0007595 lactation 26
## 62 GO:0000050 urea cycle 8
## 63 GO:0034379 very-low-density lipoprotein particle as... 8
## 64 GO:0060192 negative regulation of lipase activity 14
## 65 GO:0043401 steroid hormone mediated signaling pathw... 155
## 66 GO:0006534 cysteine metabolic process 11
## 67 GO:0003071 renal system process involved in regulat... 12
## 68 GO:0030522 intracellular receptor signaling pathway 224
## 69 GO:0007200 phospholipase C-activating G-protein cou... 36
## 70 GO:0048208 COPII vesicle coating 53
## 71 GO:0046717 acid secretion 51
## 72 GO:0014075 response to amine 21
## 73 GO:0006809 nitric oxide biosynthetic process 54
## 74 GO:0006069 ethanol oxidation 9
## 75 GO:0035999 tetrahydrofolate interconversion 9
## 76 GO:0051957 positive regulation of amino acid transp... 9
## 77 GO:0055091 phospholipid homeostasis 9
## 78 GO:0045471 response to ethanol 82
## 79 GO:0045540 regulation of cholesterol biosynthetic p... 39
## 80 GO:0045907 positive regulation of vasoconstriction 15
## 81 GO:0051004 regulation of lipoprotein lipase activit... 17
## 82 GO:0050994 regulation of lipid catabolic process 37
## 83 GO:1900103 positive regulation of endoplasmic retic... 11
## 84 GO:0008211 glucocorticoid metabolic process 15
## 85 GO:0010898 positive regulation of triglyceride cata... 5
## 86 GO:0002923 regulation of humoral immune response me... 5
## 87 GO:0051005 negative regulation of lipoprotein lipas... 5
## 88 GO:1902023 L-arginine transport 5
## 89 GO:0006526 arginine biosynthetic process 5
## 90 GO:0006548 histidine catabolic process 5
## 91 GO:0042737 drug catabolic process 5
## 92 GO:0030299 intestinal cholesterol absorption 5
## 93 GO:0010269 response to selenium ion 5
## 94 GO:0090277 positive regulation of peptide hormone s... 56
## 95 GO:0016540 protein autoprocessing 10
## 96 GO:0042493 response to drug 268
## 97 GO:0046483 heterocycle metabolic process 4063
## 98 GO:0001889 liver development 106
## 99 GO:0009395 phospholipid catabolic process 25
## 100 GO:0043687 post-translational protein modification 307
## 101 GO:0009914 hormone transport 181
## 102 GO:0036315 cellular response to sterol 12
## 103 GO:0009396 folic acid-containing compound biosynthe... 8
## 104 GO:0032375 negative regulation of cholesterol trans... 6
## 105 GO:0032489 regulation of Cdc42 protein signal trans... 6
## 106 GO:0043031 negative regulation of macrophage activa... 6
## 107 GO:0019530 taurine metabolic process 6
## 108 GO:0060850 regulation of transcription involved in ... 6
## 109 GO:0034382 chylomicron remnant clearance 6
## 110 GO:0046689 response to mercury ion 6
## 111 GO:0042538 hyperosmotic salinity response 6
## 112 GO:0043603 cellular amide metabolic process 767
## 113 GO:0010894 negative regulation of steroid biosynthe... 17
## 114 GO:0055089 fatty acid homeostasis 12
## 115 GO:2000188 regulation of cholesterol homeostasis 12
## 116 GO:0033189 response to vitamin A 12
## 117 GO:0008643 carbohydrate transport 111
## 118 GO:0018279 protein N-linked glycosylation via aspar... 35
## 119 GO:0098869 cellular oxidant detoxification 59
## 120 GO:0016525 negative regulation of angiogenesis 60
## 121 GO:0031100 animal organ regeneration 64
## 122 GO:0043200 response to amino acid 85
## 123 GO:0006541 glutamine metabolic process 20
## 124 GO:0070542 response to fatty acid 60
## 125 GO:0051592 response to calcium ion 85
## 126 GO:0042730 fibrinolysis 19
## 127 GO:0034380 high-density lipoprotein particle assemb... 13
## 128 GO:0010596 negative regulation of endothelial cell ... 36
## 129 GO:0072378 blood coagulation, fibrin clot formation 21
## 130 GO:0033240 positive regulation of cellular amine me... 7
## 131 GO:0001976 neurological system process involved in ... 7
## 132 GO:0038063 collagen-activated tyrosine kinase recep... 7
## 133 GO:0046439 L-cysteine metabolic process 7
## 134 GO:0042167 heme catabolic process 7
## 135 GO:0045346 regulation of MHC class II biosynthetic ... 7
## 136 GO:0070365 hepatocyte differentiation 7
## 137 GO:1901890 positive regulation of cell junction ass... 28
## 138 GO:1900026 positive regulation of substrate adhesio... 29
## 139 GO:0010043 response to zinc ion 26
## 140 GO:0051258 protein polymerization 189
## 141 GO:0014068 positive regulation of phosphatidylinosi... 48
## 142 GO:0019433 triglyceride catabolic process 23
## 143 GO:0051180 vitamin transport 23
## 144 GO:0008209 androgen metabolic process 14
## 145 GO:0006465 signal peptide processing 14
## 146 GO:1903830 magnesium ion transmembrane transport 14
## 147 GO:0010460 positive regulation of heart rate 14
## 148 GO:0001523 retinoid metabolic process 49
## 149 GO:0050728 negative regulation of inflammatory resp... 66
## Significant Expected weightFisher
## 1 14 1.67 1.0e-10
## 2 16 2.10 1.8e-10
## 3 86 44.07 6.5e-10
## 4 27 8.05 7.0e-10
## 5 13 1.49 9.8e-10
## 6 12 1.42 2.1e-09
## 7 12 1.49 3.9e-09
## 8 6 0.43 3.6e-07
## 9 12 1.18 4.2e-07
## 10 17 3.28 5.7e-07
## 11 7 0.68 8.9e-07
## 12 5 0.31 9.0e-07
## 13 6 0.50 1.4e-06
## 14 21 4.02 7.5e-06
## 15 17 3.59 7.8e-06
## 16 7 0.87 7.9e-06
## 17 8 1.24 9.2e-06
## 18 15 3.84 1.1e-05
## 19 7 0.93 1.4e-05
## 20 5 0.43 1.7e-05
## 21 5 0.43 1.7e-05
## 22 19 6.56 2.3e-05
## 23 6 0.74 3.7e-05
## 24 5 0.50 4.3e-05
## 25 52 12.44 5.5e-05
## 26 9 1.80 6.4e-05
## 27 4 0.31 6.9e-05
## 28 4 0.31 6.9e-05
## 29 4 0.31 6.9e-05
## 30 4 0.31 6.9e-05
## 31 29 6.44 8.0e-05
## 32 50 17.21 8.7e-05
## 33 13 3.84 8.8e-05
## 34 5 0.56 9.1e-05
## 35 5 0.56 9.1e-05
## 36 5 0.56 9.1e-05
## 37 5 0.56 9.1e-05
## 38 5 0.56 9.1e-05
## 39 25 12.81 9.4e-05
## 40 6 0.87 0.00011
## 41 9 1.24 0.00017
## 42 12 6.13 0.00017
## 43 5 0.62 0.00017
## 44 4 0.37 0.00020
## 45 4 0.37 0.00020
## 46 4 0.37 0.00020
## 47 8 0.74 0.00023
## 48 5 0.68 0.00030
## 49 30 26.06 0.00030
## 50 16 2.35 0.00041
## 51 16 6.93 0.00042
## 52 4 0.43 0.00044
## 53 4 0.43 0.00044
## 54 4 0.43 0.00044
## 55 13 5.82 0.00048
## 56 5 0.74 0.00049
## 57 5 0.74 0.00049
## 58 20 9.10 0.00068
## 59 11 1.42 0.00073
## 60 8 1.55 0.00073
## 61 7 1.61 0.00079
## 62 4 0.50 0.00083
## 63 4 0.50 0.00083
## 64 7 0.87 0.00089
## 65 16 9.59 0.00089
## 66 6 0.68 0.00089
## 67 5 0.74 0.00089
## 68 19 13.87 0.00090
## 69 8 2.23 0.00126
## 70 10 3.28 0.00133
## 71 11 3.16 0.00139
## 72 5 1.30 0.00142
## 73 7 3.34 0.00142
## 74 4 0.56 0.00143
## 75 4 0.56 0.00143
## 76 4 0.56 0.00143
## 77 4 0.56 0.00143
## 78 13 5.08 0.00147
## 79 9 2.41 0.00157
## 80 5 0.93 0.00160
## 81 10 1.05 0.00209
## 82 11 2.29 0.00209
## 83 5 0.68 0.00213
## 84 4 0.93 0.00214
## 85 3 0.31 0.00215
## 86 3 0.31 0.00215
## 87 3 0.31 0.00215
## 88 3 0.31 0.00215
## 89 3 0.31 0.00215
## 90 3 0.31 0.00215
## 91 3 0.31 0.00215
## 92 3 0.31 0.00215
## 93 3 0.31 0.00215
## 94 10 3.47 0.00222
## 95 4 0.62 0.00226
## 96 31 16.59 0.00257
## 97 213 251.50 0.00331
## 98 17 6.56 0.00332
## 99 6 1.55 0.00336
## 100 32 19.00 0.00365
## 101 23 11.20 0.00370
## 102 4 0.74 0.00381
## 103 3 0.50 0.00382
## 104 3 0.37 0.00410
## 105 3 0.37 0.00410
## 106 3 0.37 0.00410
## 107 3 0.37 0.00410
## 108 3 0.37 0.00410
## 109 3 0.37 0.00410
## 110 3 0.37 0.00410
## 111 3 0.37 0.00410
## 112 48 47.48 0.00414
## 113 5 1.05 0.00481
## 114 4 0.74 0.00482
## 115 4 0.74 0.00482
## 116 4 0.74 0.00482
## 117 15 6.87 0.00488
## 118 7 2.17 0.00495
## 119 9 3.65 0.00502
## 120 10 3.71 0.00573
## 121 10 3.96 0.00576
## 122 13 5.26 0.00624
## 123 5 1.24 0.00636
## 124 8 3.71 0.00659
## 125 11 5.26 0.00660
## 126 11 1.18 0.00663
## 127 4 0.80 0.00663
## 128 5 2.23 0.00665
## 129 10 1.30 0.00666
## 130 3 0.43 0.00685
## 131 3 0.43 0.00685
## 132 3 0.43 0.00685
## 133 3 0.43 0.00685
## 134 3 0.43 0.00685
## 135 3 0.43 0.00685
## 136 3 0.43 0.00685
## 137 4 1.73 0.00685
## 138 6 1.80 0.00767
## 139 6 1.61 0.00790
## 140 11 11.70 0.00817
## 141 8 2.97 0.00861
## 142 9 1.42 0.00862
## 143 7 1.42 0.00872
## 144 4 0.87 0.00884
## 145 4 0.87 0.00884
## 146 4 0.87 0.00884
## 147 4 0.87 0.00884
## 148 11 3.03 0.00948
## 149 12 4.09 0.00949
sig.genes <- sigGenes(go_data)
# Make table
write.csv(go_table, "../data/GO_conserved_tissue_specific_liver.csv", quote = FALSE, row.names = FALSE)
goresults <- sapply(go_table$GO.ID, function(x)
{
genes<-genesInTerm(go_data, x)
genes[[1]][genes[[1]] %in% sig.genes]
})
goresults["GO:1904478"] # regulation of intestinal absorption "ENSG00000110243" "ENSG00000158874"
## $`GO:1904478`
## [1] "ENSG00000005471" "ENSG00000105697" "ENSG00000110243" "ENSG00000118137"
## [5] "ENSG00000158874"
goresults["GO:0006641"] # triglyceride metabolic process "ENSG00000091583" "ENSG00000110243" "ENSG00000158874"
## $`GO:0006641`
## [1] "ENSG00000021826" "ENSG00000025434" "ENSG00000073060"
## [4] "ENSG00000083807" "ENSG00000084674" "ENSG00000091583"
## [7] "ENSG00000110243" "ENSG00000110245" "ENSG00000112293"
## [10] "ENSG00000114771" "ENSG00000118137" "ENSG00000119927"
## [13] "ENSG00000121691" "ENSG00000125629" "ENSG00000130649"
## [16] "ENSG00000151365" "ENSG00000158874" "ENSG00000163586"
## [19] "ENSG00000166035" "ENSG00000169174" "ENSG00000186480"
goresults["GO:0015721"] # bile acid and bile salt transport "ENSG00000163631" "ENSG00000186350"
## $`GO:0015721`
## [1] "ENSG00000005471" "ENSG00000012504" "ENSG00000083807"
## [4] "ENSG00000103569" "ENSG00000108846" "ENSG00000134538"
## [7] "ENSG00000140396" "ENSG00000163631" "ENSG00000163959"
## [10] "ENSG00000172345" "ENSG00000186350" "ENSG00000214530"
# Matrix for which are in the test set
true_false <- rownames(cpm_12184) %in% combine_lungs$x
true_false <- as.numeric(true_false)
# Merge ENSG with true/false
test_gene <- as.vector(true_false)
names(test_gene) <- rownames(cpm_12184)
# Run topGO
go_data <- new("topGOdata",
ontology = "BP",
allGenes = test_gene,
geneSel = function(allScore){
return(allScore > 0.01)
},
nodeSize = 5,
annotationFun = annFUN.org,
mapping = "org.Hs.eg.db",
ID = "ensembl")
##
## Building most specific GOs .....
## ( 10269 GO terms found. )
##
## Build GO DAG topology ..........
## ( 14352 GO terms and 33417 relations. )
##
## Annotating nodes ...............
## ( 10194 genes annotated to the GO terms. )
# Perform enrichment test
go_test <- runTest(go_data, algorithm = "weight01", statistic = "fisher")
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 4619 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 19: 1 nodes to be scored (0 eliminated genes)
##
## Level 18: 2 nodes to be scored (0 eliminated genes)
##
## Level 17: 8 nodes to be scored (17 eliminated genes)
##
## Level 16: 15 nodes to be scored (22 eliminated genes)
##
## Level 15: 38 nodes to be scored (50 eliminated genes)
##
## Level 14: 86 nodes to be scored (173 eliminated genes)
##
## Level 13: 140 nodes to be scored (560 eliminated genes)
##
## Level 12: 199 nodes to be scored (1308 eliminated genes)
##
## Level 11: 321 nodes to be scored (2751 eliminated genes)
##
## Level 10: 465 nodes to be scored (3773 eliminated genes)
##
## Level 9: 604 nodes to be scored (5319 eliminated genes)
##
## Level 8: 661 nodes to be scored (6769 eliminated genes)
##
## Level 7: 717 nodes to be scored (8071 eliminated genes)
##
## Level 6: 620 nodes to be scored (8941 eliminated genes)
##
## Level 5: 398 nodes to be scored (9420 eliminated genes)
##
## Level 4: 227 nodes to be scored (9742 eliminated genes)
##
## Level 3: 96 nodes to be scored (9898 eliminated genes)
##
## Level 2: 20 nodes to be scored (9976 eliminated genes)
##
## Level 1: 1 nodes to be scored (10069 eliminated genes)
go_table <- GenTable(go_data, weightFisher = go_test,
orderBy = "weightFisher", ranksOf = "weightFisher",
topNodes = sum(score(go_test) < .01))
go_table
## GO.ID Term Annotated
## 1 GO:0042102 positive regulation of T cell proliferat... 54
## 2 GO:0045730 respiratory burst 20
## 3 GO:0043312 neutrophil degranulation 368
## 4 GO:0032729 positive regulation of interferon-gamma ... 31
## 5 GO:0070125 mitochondrial translational elongation 71
## 6 GO:0007166 cell surface receptor signaling pathway 1748
## 7 GO:0043129 surfactant homeostasis 7
## 8 GO:0045579 positive regulation of B cell differenti... 13
## 9 GO:0002250 adaptive immune response 204
## 10 GO:0044267 cellular protein metabolic process 3534
## 11 GO:0045087 innate immune response 521
## 12 GO:0022617 extracellular matrix disassembly 52
## 13 GO:0070374 positive regulation of ERK1 and ERK2 cas... 108
## 14 GO:0050852 T cell receptor signaling pathway 136
## 15 GO:0006635 fatty acid beta-oxidation 61
## 16 GO:0090023 positive regulation of neutrophil chemot... 16
## 17 GO:0042554 superoxide anion generation 18
## 18 GO:0070126 mitochondrial translational termination 72
## 19 GO:0009725 response to hormone 607
## 20 GO:0043407 negative regulation of MAP kinase activi... 56
## 21 GO:0009083 branched-chain amino acid catabolic proc... 19
## 22 GO:2001185 regulation of CD8-positive, alpha-beta T... 5
## 23 GO:0001666 response to hypoxia 263
## 24 GO:0031295 T cell costimulation 41
## 25 GO:0007585 respiratory gaseous exchange 41
## 26 GO:0000302 response to reactive oxygen species 158
## 27 GO:0060333 interferon-gamma-mediated signaling path... 69
## 28 GO:1903039 positive regulation of leukocyte cell-ce... 132
## 29 GO:2000483 negative regulation of interleukin-8 sec... 6
## 30 GO:0030889 negative regulation of B cell proliferat... 13
## 31 GO:0002407 dendritic cell chemotaxis 13
## 32 GO:0032743 positive regulation of interleukin-2 pro... 23
## 33 GO:0061154 endothelial tube morphogenesis 14
## 34 GO:0030098 lymphocyte differentiation 195
## 35 GO:0030815 negative regulation of cAMP metabolic pr... 20
## 36 GO:0071871 response to epinephrine 11
## 37 GO:0002381 immunoglobulin production involved in im... 29
## 38 GO:1904994 regulation of leukocyte adhesion to vasc... 7
## 39 GO:0032324 molybdopterin cofactor biosynthetic proc... 7
## 40 GO:0032715 negative regulation of interleukin-6 pro... 22
## 41 GO:2000406 positive regulation of T cell migration 15
## 42 GO:0006968 cellular defense response 26
## 43 GO:2000727 positive regulation of cardiac muscle ce... 16
## 44 GO:0031581 hemidesmosome assembly 8
## 45 GO:0035810 positive regulation of urine volume 8
## 46 GO:0034599 cellular response to oxidative stress 209
## 47 GO:0030279 negative regulation of ossification 43
## 48 GO:0043011 myeloid dendritic cell differentiation 9
## 49 GO:0001771 immunological synapse formation 9
## 50 GO:0019370 leukotriene biosynthetic process 9
## 51 GO:0045860 positive regulation of protein kinase ac... 364
## 52 GO:0030168 platelet activation 111
## 53 GO:0046633 alpha-beta T cell proliferation 18
## 54 GO:0043372 positive regulation of CD4-positive, alp... 14
## 55 GO:0031589 cell-substrate adhesion 245
## 56 GO:0033138 positive regulation of peptidyl-serine p... 59
## 57 GO:0043304 regulation of mast cell degranulation 19
## 58 GO:0042832 defense response to protozoan 10
## 59 GO:0046641 positive regulation of alpha-beta T cell... 10
## 60 GO:0031330 negative regulation of cellular cataboli... 158
## 61 GO:0001541 ovarian follicle development 32
## 62 GO:0071356 cellular response to tumor necrosis fact... 188
## 63 GO:0030593 neutrophil chemotaxis 42
## 64 GO:0035987 endodermal cell differentiation 33
## 65 GO:0008544 epidermis development 161
## 66 GO:0071361 cellular response to ethanol 11
## 67 GO:0045589 regulation of regulatory T cell differen... 11
## 68 GO:0006954 inflammatory response 403
## 69 GO:0031532 actin cytoskeleton reorganization 76
## 70 GO:0002696 positive regulation of leukocyte activat... 191
## 71 GO:0050853 B cell receptor signaling pathway 34
## 72 GO:2000352 negative regulation of endothelial cell ... 22
## 73 GO:0043552 positive regulation of phosphatidylinosi... 22
## 74 GO:0050731 positive regulation of peptidyl-tyrosine... 96
## 75 GO:0032623 interleukin-2 production 45
## 76 GO:0032633 interleukin-4 production 17
## 77 GO:0030819 positive regulation of cAMP biosynthetic... 33
## 78 GO:0050710 negative regulation of cytokine secretio... 36
## 79 GO:0038083 peptidyl-tyrosine autophosphorylation 37
## 80 GO:0050777 negative regulation of immune response 81
## 81 GO:0042493 response to drug 268
## 82 GO:0030214 hyaluronan catabolic process 13
## 83 GO:0051131 chaperone-mediated protein complex assem... 13
## Significant Expected weightFisher
## 1 13 1.87 1.5e-06
## 2 7 0.69 2.4e-06
## 3 31 12.74 7.3e-06
## 4 8 1.07 7.5e-06
## 5 11 2.46 2.9e-05
## 6 109 60.53 2.9e-05
## 7 4 0.24 4.6e-05
## 8 5 0.45 5.0e-05
## 9 27 7.06 5.6e-05
## 10 136 122.38 6.3e-05
## 11 41 18.04 6.3e-05
## 12 9 1.80 6.6e-05
## 13 13 3.74 8.7e-05
## 14 15 4.71 0.00010
## 15 12 2.11 0.00012
## 16 5 0.55 0.00015
## 17 5 0.62 0.00016
## 18 10 2.49 0.00017
## 19 30 21.02 0.00020
## 20 8 1.94 0.00032
## 21 5 0.66 0.00038
## 22 3 0.17 0.00039
## 23 21 9.11 0.00044
## 24 7 1.42 0.00046
## 25 7 1.42 0.00046
## 26 12 5.47 0.00054
## 27 9 2.39 0.00058
## 28 24 4.57 0.00066
## 29 3 0.21 0.00076
## 30 4 0.45 0.00079
## 31 4 0.45 0.00079
## 32 5 0.80 0.00097
## 33 4 0.48 0.00107
## 34 25 6.75 0.00109
## 35 4 0.69 0.00119
## 36 3 0.38 0.00119
## 37 3 1.00 0.00120
## 38 3 0.24 0.00130
## 39 3 0.24 0.00130
## 40 5 0.76 0.00141
## 41 4 0.52 0.00142
## 42 5 0.90 0.00175
## 43 4 0.55 0.00201
## 44 3 0.28 0.00203
## 45 3 0.28 0.00203
## 46 15 7.24 0.00261
## 47 5 1.49 0.00294
## 48 3 0.31 0.00296
## 49 3 0.31 0.00296
## 50 3 0.31 0.00296
## 51 25 12.60 0.00303
## 52 11 3.84 0.00324
## 53 6 0.62 0.00344
## 54 3 0.48 0.00349
## 55 16 8.48 0.00396
## 56 7 2.04 0.00408
## 57 4 0.66 0.00410
## 58 3 0.35 0.00412
## 59 3 0.35 0.00412
## 60 10 5.47 0.00435
## 61 5 1.11 0.00451
## 62 12 6.51 0.00487
## 63 9 1.45 0.00508
## 64 5 1.14 0.00517
## 65 11 5.58 0.00519
## 66 3 0.38 0.00552
## 67 3 0.38 0.00552
## 68 31 13.96 0.00566
## 69 9 2.63 0.00578
## 70 29 6.61 0.00608
## 71 8 1.18 0.00608
## 72 4 0.76 0.00631
## 73 4 0.76 0.00631
## 74 9 3.32 0.00645
## 75 8 1.56 0.00668
## 76 4 0.59 0.00679
## 77 4 1.14 0.00716
## 78 7 1.25 0.00846
## 79 5 1.28 0.00848
## 80 11 2.80 0.00869
## 81 17 9.28 0.00909
## 82 3 0.45 0.00909
## 83 3 0.45 0.00909
sig.genes <- sigGenes(go_data)
# Make table
write.csv(go_table, "../data/GO_conserved_tissue_specific_lung.csv", quote = FALSE, row.names = FALSE)
goresults <- sapply(go_table$GO.ID, function(x)
{
genes<-genesInTerm(go_data, x)
genes[[1]][genes[[1]] %in% sig.genes]
})
goresults["GO:0007585"] # respiratory gaseous exchange "ENSG00000168484" "ENSG00000168878"
## $`GO:0007585`
## [1] "ENSG00000014919" "ENSG00000049540" "ENSG00000078401" "ENSG00000100368"
## [5] "ENSG00000133661" "ENSG00000168484" "ENSG00000168878"
goresults["GO:0019372"] # lipoxygenase pathway "ENSG00000012779" "ENSG00000132965"
## $<NA>
## NULL
# Tissue up/downreg
heart_upreg_data <- read.csv("../data/human_specific_upreg_heart_genes.csv", stringsAsFactors = FALSE)
heart_downreg_data <- read.csv("../data/human_specific_downreg_heart_genes.csv", stringsAsFactors = FALSE)
kidney_upreg_data <- read.csv("../data/human_specific_upreg_kidney_genes.csv", stringsAsFactors = FALSE)
kidney_downreg_data <- read.csv("../data/human_specific_downreg_kidney_genes.csv", stringsAsFactors = FALSE)
liver_upreg_data <- read.csv("../data/human_specific_upreg_liver_genes.csv", stringsAsFactors = FALSE)
liver_downreg_data <- read.csv("../data/human_specific_downreg_liver_genes.csv", stringsAsFactors = FALSE)
lung_upreg_data <- read.csv("../data/human_specific_upreg_lung_genes.csv", stringsAsFactors = FALSE)
lung_downreg_data <- read.csv("../data/human_specific_downreg_lung_genes.csv", stringsAsFactors = FALSE)
upreg_heart_ranking <- ranking_in_gtex(heart_upreg_data$x)[,5]
downreg_heart_ranking <- ranking_in_gtex(heart_downreg_data$x)[,5]
upreg_kidney_ranking <- ranking_in_gtex(kidney_upreg_data$x)[,6]
downreg_kidney_ranking <- ranking_in_gtex(kidney_downreg_data$x)[,6]
upreg_liver_ranking <- ranking_in_gtex(liver_upreg_data$x)[,7]
downreg_liver_ranking <- ranking_in_gtex(liver_downreg_data$x)[,7]
upreg_lung_ranking <- ranking_in_gtex(lung_upreg_data$x)[,8]
downreg_lung_ranking <- ranking_in_gtex(lung_downreg_data$x)[,8]
# Heart
upreg_heart_num <- array(1, dim=c(length(upreg_heart_ranking), 1))
heart_cor1 <- cbind(upreg_heart_num, upreg_heart_ranking)
downreg_heart_num <- array(4, dim=c(length(downreg_heart_ranking), 1))
heart_cor2 <- cbind(downreg_heart_num, downreg_heart_ranking)
# Kidney
upreg_kidney_num <- array(1, dim=c(length(upreg_kidney_ranking), 1))
kidney_cor1 <- cbind(upreg_kidney_num, upreg_kidney_ranking)
downreg_kidney_num <- array(4, dim=c(length(downreg_kidney_ranking), 1))
kidney_cor2 <- cbind(downreg_kidney_num, downreg_kidney_ranking)
# Liver
upreg_liver_num <- array(1, dim=c(length(upreg_liver_ranking), 1))
liver_cor1 <- cbind(upreg_liver_num, upreg_liver_ranking)
downreg_liver_num <- array(4, dim=c(length(downreg_liver_ranking), 1))
liver_cor2 <- cbind(downreg_liver_num, downreg_liver_ranking)
# Lung
upreg_lung_num <- array(1, dim=c(length(upreg_lung_ranking), 1))
lung_cor1 <- cbind(upreg_lung_num, upreg_lung_ranking)
downreg_lung_num <- array(4, dim=c(length(downreg_lung_ranking), 1))
lung_cor2 <- cbind(downreg_lung_num, downreg_lung_ranking)
tissue_cor <- rbind(heart_cor1, heart_cor2, kidney_cor1, kidney_cor2, liver_cor1, liver_cor2, lung_cor1, lung_cor2)
# Get the correlation between tissues
cor(tissue_cor[,1], tissue_cor[,2])
## [1] 0.7146797
# How many times does the ranking in our data match the ranking in GTEx?
summary(tissue_cor[,1] == tissue_cor[,2])
## Mode FALSE TRUE
## logical 259 428
1540/(199+1540)
## [1] 0.8855664
428/(259+428)
## [1] 0.6229985
prop.test(c(1540, 428), c(1739, 687))
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: c(1540, 428) out of c(1739, 687)
## X-squared = 219.98, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.2223457 0.3027900
## sample estimates:
## prop 1 prop 2
## 0.8855664 0.6229985