The goal of this script is to perform the main analyses and the supplemental analyses using Cormotif.
library("edgeR")
## Warning: package 'edgeR' was built under R version 3.2.4
## Loading required package: limma
## Warning: package 'limma' was built under R version 3.2.4
library("R.utils")
## Warning: package 'R.utils' was built under R version 3.2.5
## Loading required package: R.oo
## Warning: package 'R.oo' was built under R version 3.2.5
## Loading required package: R.methodsS3
## Warning: package 'R.methodsS3' was built under R version 3.2.3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.21.0 (2016-10-30) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
## R.utils v2.4.0 (2016-09-13) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
library("plyr")
## Warning: package 'plyr' was built under R version 3.2.5
library("limma")
library("Biobase")
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, as.vector, cbind,
## colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
## grep, grepl, intersect, is.unsorted, lapply, lengths, Map,
## mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
## setdiff, sort, table, tapply, union, unique, unlist, unsplit
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
library("GEOquery")
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
##
## Attaching package: 'GEOquery'
## The following object is masked from 'package:R.utils':
##
## gunzip
library("plyr")
#theme_set(theme_bw(base_size = 16))
library("biomaRt")
library("colorfulVennPlot")
## Loading required package: grid
library("VennDiagram")
## Warning: package 'VennDiagram' was built under R version 3.2.5
## Loading required package: futile.logger
## Warning: package 'futile.logger' was built under R version 3.2.5
library("gridExtra")
## Warning: package 'gridExtra' was built under R version 3.2.4
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:BiocGenerics':
##
## combine
library("car")
## Warning: package 'car' was built under R version 3.2.5
##
## Attaching package: 'car'
## The following object is masked from 'package:VennDiagram':
##
## ellipse
library("topGO")
## Loading required package: graph
##
## Attaching package: 'graph'
## The following object is masked from 'package:plyr':
##
## join
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Warning: package 'AnnotationDbi' was built under R version 3.2.3
## Loading required package: stats4
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 3.2.3
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 3.2.3
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
##
## rename
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
##
## desc
## The following object is masked from 'package:R.oo':
##
## trim
## Loading required package: DBI
## Warning: package 'DBI' was built under R version 3.2.5
##
## Loading required package: SparseM
## Warning: package 'SparseM' was built under R version 3.2.5
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
## Warning: replacing previous import by 'graph::.__C__dist' when loading
## 'topGO'
##
## groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built.
##
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
##
## members
## The following object is masked from 'package:grid':
##
## depth
library("rowr")
##
## Attaching package: 'rowr'
## The following object is masked from 'package:plyr':
##
## count
library("Cormotif")
## Loading required package: affy
library("ggfortify")
## Warning: package 'ggfortify' was built under R version 3.2.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.4
library("dplyr")
## Warning: package 'dplyr' was built under R version 3.2.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:rowr':
##
## coalesce, count
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## intersect, rename, setdiff, union
## The following object is masked from 'package:graph':
##
## union
## The following object is masked from 'package:car':
##
## recode
## The following object is masked from 'package:gridExtra':
##
## combine
## The following object is masked from 'package:biomaRt':
##
## select
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("tidyr")
## Warning: package 'tidyr' was built under R version 3.2.5
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:IRanges':
##
## expand
## The following object is masked from 'package:R.utils':
##
## extract
library("statmod")
## Warning: package 'statmod' was built under R version 3.2.5
library("qtlcharts")
## Warning: package 'qtlcharts' was built under R version 3.2.5
library("mygene")
## Loading required package: GenomicFeatures
## Warning: package 'GenomicFeatures' was built under R version 3.2.3
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 3.2.3
## Loading required package: GenomicRanges
## Warning: package 'GenomicRanges' was built under R version 3.2.3
##
## Attaching package: 'GenomicFeatures'
## The following object is masked from 'package:topGO':
##
## genes
##
## Attaching package: 'mygene'
## The following object is masked from 'package:dplyr':
##
## query
## The following object is masked from 'package:biomaRt':
##
## getGene
source("~/Desktop/Endoderm_TC/ashlar-trial/analysis/chunk-options.R")
## Warning: package 'knitr' was built under R version 3.2.5
# Load the data
cpm_cyclicloess_40 <- read.table("~/Desktop/Endoderm_TC/ashlar-trial/data/cpm_cyclicloess_40.txt")
# Setup matrix
mean_tech_reps_matrix <- as.matrix(cpm_cyclicloess_40, nrow=10304, ncol = 40)
mean_tech_reps_matrix[1:10304,1:40] = as.numeric(as.character(mean_tech_reps_matrix[1:10304,1:40]))
colnames(mean_tech_reps_matrix) <- colnames(cpm_cyclicloess_40)
rownames(mean_tech_reps_matrix) <- rownames(cpm_cyclicloess_40)
# Create a factor for the experiments, with the levels ordered such that as.numeric converts them to numbers in a defined manner
# Note: there are 6 human biological replicates and 4 chimp biological replicates
species <- c("H", "H","H","H","H","H", "C", "C","C","C",
"H","H","H","H","H","H", "C", "C","C","C",
"H","H","H","H","H","H", "C", "C","C","C",
"H","H","H","H","H","H", "C", "C","C","C")
day <- c("0", "0","0","0","0","0","0", "0", "0", "0", "1","1","1","1","1","1","1","1", "1","1",
"2", "2","2","2","2","2","2","2","2", "2",
"3", "3","3","3","3","3","3","3", "3", "3")
group_fac <- factor(paste(species, day, sep = "."),
levels = c("H.0", "H.1", "H.2", "H.3",
"C.0", "C.1", "C.2", "C.3"))
groupid <- as.numeric(group_fac)
# Conditions to compare:
# day0, day1, day2, day3,
compid_tran <- data.frame(c1 = c(1, 2, 3, 5, 6, 7),
c2 = c(2, 3, 4, 6, 7, 8))
# Differences during diffentiation
set.seed(1)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
compid = compid_tran, K = 5:10, max.iter=500)
[1] "We have run the first 50 iterations for K=5"
[1] "We have run the first 100 iterations for K=5"
[1] "We have run the first 150 iterations for K=5"
[1] "We have run the first 200 iterations for K=5"
[1] "We have run the first 50 iterations for K=6"
[1] "We have run the first 100 iterations for K=6"
[1] "We have run the first 150 iterations for K=6"
[1] "We have run the first 50 iterations for K=7"
[1] "We have run the first 100 iterations for K=7"
[1] "We have run the first 150 iterations for K=7"
[1] "We have run the first 200 iterations for K=7"
[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"
[1] "We have run the first 350 iterations for K=8"
[1] "We have run the first 400 iterations for K=8"
[1] "We have run the first 450 iterations for K=8"
[1] "We have run the first 500 iterations for K=8"
[1] "We have run the first 50 iterations for K=9"
[1] "We have run the first 100 iterations for K=9"
[1] "We have run the first 150 iterations for K=9"
[1] "We have run the first 200 iterations for K=9"
[1] "We have run the first 250 iterations for K=9"
[1] "We have run the first 300 iterations for K=9"
[1] "We have run the first 350 iterations for K=9"
[1] "We have run the first 400 iterations for K=9"
[1] "We have run the first 450 iterations for K=9"
[1] "We have run the first 500 iterations for K=9"
[1] "We have run the first 50 iterations for K=10"
[1] "We have run the first 100 iterations for K=10"
[1] "We have run the first 150 iterations for K=10"
[1] "We have run the first 200 iterations for K=10"
[1] "We have run the first 250 iterations for K=10"
[1] "We have run the first 300 iterations for K=10"
[1] "We have run the first 350 iterations for K=10"
[1] "We have run the first 400 iterations for K=10"
[1] "We have run the first 450 iterations for K=10"
cormotif_tran$bestmotif$loglike
[1] -165215.3
Let’s look at another seed
set.seed(12345)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
compid = compid_tran, K = 5:10, max.iter=500)
[1] "We have run the first 50 iterations for K=5"
[1] "We have run the first 100 iterations for K=5"
[1] "We have run the first 150 iterations for K=5"
[1] "We have run the first 200 iterations for K=5"
[1] "We have run the first 50 iterations for K=6"
[1] "We have run the first 100 iterations for K=6"
[1] "We have run the first 150 iterations for K=6"
[1] "We have run the first 200 iterations for K=6"
[1] "We have run the first 250 iterations for K=6"
[1] "We have run the first 300 iterations for K=6"
[1] "We have run the first 50 iterations for K=7"
[1] "We have run the first 100 iterations for K=7"
[1] "We have run the first 150 iterations for K=7"
[1] "We have run the first 200 iterations for K=7"
[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"
[1] "We have run the first 350 iterations for K=8"
[1] "We have run the first 400 iterations for K=8"
[1] "We have run the first 450 iterations for K=8"
[1] "We have run the first 500 iterations for K=8"
[1] "We have run the first 50 iterations for K=9"
[1] "We have run the first 100 iterations for K=9"
[1] "We have run the first 150 iterations for K=9"
[1] "We have run the first 200 iterations for K=9"
[1] "We have run the first 250 iterations for K=9"
[1] "We have run the first 300 iterations for K=9"
[1] "We have run the first 350 iterations for K=9"
[1] "We have run the first 400 iterations for K=9"
[1] "We have run the first 450 iterations for K=9"
[1] "We have run the first 500 iterations for K=9"
[1] "We have run the first 50 iterations for K=10"
[1] "We have run the first 100 iterations for K=10"
[1] "We have run the first 150 iterations for K=10"
[1] "We have run the first 200 iterations for K=10"
[1] "We have run the first 250 iterations for K=10"
[1] "We have run the first 300 iterations for K=10"
[1] "We have run the first 350 iterations for K=10"
[1] "We have run the first 400 iterations for K=10"
[1] "We have run the first 450 iterations for K=10"
[1] "We have run the first 500 iterations for K=10"
plotIC(cormotif_tran)
plotMotif(cormotif_tran)
cormotif_tran$bestmotif$loglike
[1] -165189
set.seed(12345)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
compid = compid_tran, K = 7, max.iter=500)
[1] "We have run the first 50 iterations for K=7"
[1] "We have run the first 100 iterations for K=7"
[1] "We have run the first 150 iterations for K=7"
[1] "We have run the first 200 iterations for K=7"
[1] "We have run the first 250 iterations for K=7"
[1] "We have run the first 300 iterations for K=7"
[1] "We have run the first 350 iterations for K=7"
[1] "We have run the first 400 iterations for K=7"
[1] "We have run the first 450 iterations for K=7"
[1] "We have run the first 500 iterations for K=7"
plotIC(cormotif_tran)
plotMotif(cormotif_tran)
cormotif_tran$bestmotif$loglike
[1] -165217.8
set.seed(12345)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
compid = compid_tran, K = 8, max.iter=500)
[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"
[1] "We have run the first 350 iterations for K=8"
[1] "We have run the first 400 iterations for K=8"
[1] "We have run the first 450 iterations for K=8"
[1] "We have run the first 500 iterations for K=8"
plotIC(cormotif_tran)
plotMotif(cormotif_tran)
cormotif_tran$bestmotif$loglike
[1] -165217.5
gene_prob_tran <- cormotif_tran$bestmotif$p.post
rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)
# Seed 12345
prob_day_1 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)
[1] 3786
#3786
prob_day_2 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] < 0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6 ]>0.5),])
length(prob_day_2)
[1] 616
#616
prob_day_3 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_3)
[1] 1024
#1024
prob_day_4 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_4)
[1] 97
# 97
prob_day_5 <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)
[1] 654
#654
prob_day_6 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_6)
[1] 215
#215
prob_day_7 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_7)
[1] 223
#223
prob_day_8 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_8)
[1] 1157
#1157
The seed number with the greatest log likelihood is seed # 66 for K = 8 and #4 for K = 7. It should be noted that the log likehood values are relatively similar.
# Re-run Cormotif with the best seed
compid_tran <- data.frame(c1 = c(1, 2, 3, 5, 6, 7),
c2 = c(2, 3, 4, 6, 7, 8))
# Best seed when K = 7
set.seed(4)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
compid = compid_tran, K = 7, max.iter=500)
[1] "We have run the first 50 iterations for K=7"
[1] "We have run the first 100 iterations for K=7"
[1] "We have run the first 150 iterations for K=7"
[1] "We have run the first 200 iterations for K=7"
[1] "We have run the first 250 iterations for K=7"
cormotif_tran$bestmotif$loglike
[1] -165185.8
gene_prob_tran <- cormotif_tran$bestmotif$p.post
rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)
plotIC(cormotif_tran)
plotMotif(cormotif_tran)
dim(gene_prob_tran)
[1] 10304 6
# Test degree of conservation- posterior probability difference between the humans and the chimps within a motif
# 0.3 (7734 genes)
length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.3) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.3) & (abs(gene_prob_tran[,3] - gene_prob_tran[,6]) <= 0.3), ]))
[1] 7734
# 0.2 (6663 genes)
length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.2) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.2) & (abs(gene_prob_tran[,3] - gene_prob_tran[,6]) <= 0.2), ]))
[1] 6663
# 0.1 (4846 genes)
length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.1) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.1) & (abs(gene_prob_tran[,3] - gene_prob_tran[,6]) <= 0.1), ]))
[1] 4846
prob_day_1 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)
[1] 3795
write.table(prob_day_1, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_1_K7.txt")
#3795
prob_day_2 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] > 0.5 & gene_prob_tran[,3] < 0.5 & gene_prob_tran[,4] < 0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6 ] < 0.5),])
length(prob_day_2)
[1] 1144
write.table(prob_day_2, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_2_K7.txt")
#1144
prob_day_3 <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] < 0.5 & gene_prob_tran[,3] > 0.5 & gene_prob_tran[,4] < 0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)
[1] 226
write.table(prob_day_3, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_3_K7.txt")
#226
prob_day_4 <- rownames(gene_prob_tran[(gene_prob_tran[,1] > 0.5 & gene_prob_tran[,2] > 0.5 & gene_prob_tran[,3] < 0.5 & gene_prob_tran[,4] > 0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6] < 0.5),])
length(prob_day_4)
[1] 680
write.table(prob_day_4, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_4_K7.txt")
# 680
prob_day_5 <- rownames(gene_prob_tran[(gene_prob_tran[,1] > 0.5 & gene_prob_tran[,2] < 0.5 & gene_prob_tran[,3] < 0.5 & gene_prob_tran[,4] > 0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6] < 0.5),])
length(prob_day_5)
[1] 1035
write.table(prob_day_5, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_5_K7.txt")
#1035
prob_day_6 <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] > 0.5 & gene_prob_tran[,3] > 0.5 & gene_prob_tran[,4] < 0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6] > 0.5),])
length(prob_day_6)
[1] 618
write.table(prob_day_6, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_6_K7.txt")
#618
prob_day_7 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] > 0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_7)
[1] 371
write.table(prob_day_7, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_7_K7.txt")
#371
# Best seed when K = 8
set.seed(66)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
compid = compid_tran, K = 5:10, max.iter=500)
[1] "We have run the first 50 iterations for K=5"
[1] "We have run the first 100 iterations for K=5"
[1] "We have run the first 150 iterations for K=5"
[1] "We have run the first 200 iterations for K=5"
[1] "We have run the first 50 iterations for K=6"
[1] "We have run the first 100 iterations for K=6"
[1] "We have run the first 150 iterations for K=6"
[1] "We have run the first 200 iterations for K=6"
[1] "We have run the first 50 iterations for K=7"
[1] "We have run the first 100 iterations for K=7"
[1] "We have run the first 150 iterations for K=7"
[1] "We have run the first 200 iterations for K=7"
[1] "We have run the first 250 iterations for K=7"
[1] "We have run the first 300 iterations for K=7"
[1] "We have run the first 350 iterations for K=7"
[1] "We have run the first 400 iterations for K=7"
[1] "We have run the first 450 iterations for K=7"
[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"
[1] "We have run the first 350 iterations for K=8"
[1] "We have run the first 400 iterations for K=8"
[1] "We have run the first 450 iterations for K=8"
[1] "We have run the first 500 iterations for K=8"
[1] "We have run the first 50 iterations for K=9"
[1] "We have run the first 100 iterations for K=9"
[1] "We have run the first 150 iterations for K=9"
[1] "We have run the first 200 iterations for K=9"
[1] "We have run the first 250 iterations for K=9"
[1] "We have run the first 300 iterations for K=9"
[1] "We have run the first 350 iterations for K=9"
[1] "We have run the first 50 iterations for K=10"
[1] "We have run the first 100 iterations for K=10"
[1] "We have run the first 150 iterations for K=10"
[1] "We have run the first 200 iterations for K=10"
[1] "We have run the first 250 iterations for K=10"
[1] "We have run the first 300 iterations for K=10"
[1] "We have run the first 350 iterations for K=10"
[1] "We have run the first 400 iterations for K=10"
[1] "We have run the first 450 iterations for K=10"
[1] "We have run the first 500 iterations for K=10"
plotIC(cormotif_tran)
plotMotif(cormotif_tran)
set.seed(66)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
compid = compid_tran, K = 8, max.iter=500)
[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"
cormotif_tran$bestmotif$loglike
[1] -165165
gene_prob_tran <- cormotif_tran$bestmotif$p.post
rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)
plotIC(cormotif_tran)
plotMotif(cormotif_tran)
dim(gene_prob_tran)
[1] 10304 6
# Test degree of conservation for days 0-3: posterior probability difference between the humans and the chimps within a motif
# 0.3 (7795 genes)
length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.3) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.3) & (abs(gene_prob_tran[,3] - gene_prob_tran[,6]) <= 0.3), ]))
[1] 7795
# 0.2 (6740 genes)
length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.2) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.2) & (abs(gene_prob_tran[,3] - gene_prob_tran[,6]) <= 0.2), ]))
[1] 6740
# 0.1 (4877 genes)
length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.1) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.1) & (abs(gene_prob_tran[,3] - gene_prob_tran[,6]) <= 0.1), ]))
[1] 4877
# Test degree of conservation for days 0-2: posterior probability difference between the humans and the chimps within a motif
# 0.3 (9634 genes)
length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.3) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.3) , ]))
[1] 9634
# 0.2 (9076 genes)
length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.2) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.2) , ]))
[1] 9076
# 0.1 (7899 genes)
length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.1) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.1) , ]))
[1] 7899
# Pull the genes from seed #66 when K = 8.
prob_day_1 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)
[1] 3789
#3789
write.table(prob_day_1, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_1_K8.txt")
prob_day_2 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_2)
[1] 1017
#1017
write.table(prob_day_2, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_2_K8.txt")
prob_day_3 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)
[1] 219
#219
write.table(prob_day_3, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_3_K8.txt")
prob_day_4 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_4)
[1] 187
#187
write.table(prob_day_4, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_4_K8.txt")
prob_day_5 <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)
[1] 620
#620
write.table(prob_day_5, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_5_K8.txt")
prob_day_6 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_6)
[1] 1147
#1147
write.table(prob_day_6, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_6_K8.txt")
prob_day_7 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_7)
[1] 686
#686
write.table(prob_day_7, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_7_K8.txt")
prob_day_8 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_8)
[1] 339
#339
write.table(prob_day_8, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_8_K8.txt")
prob_day_1 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)
[1] 3789
#3789
prob_day_2 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_2)
[1] 1017
#1017
prob_day_3 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)
[1] 219
#219
prob_day_4 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_4)
[1] 187
#187
prob_day_5 <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)
[1] 620
#620
prob_day_6 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_6)
[1] 1147
#1147
prob_day_7 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_7)
[1] 686
#686
prob_day_8 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_8)
[1] 339
#339
# Lowest when K = 7
set.seed(54)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
compid = compid_tran, K = 7, max.iter=500)
[1] "We have run the first 50 iterations for K=7"
[1] "We have run the first 100 iterations for K=7"
[1] "We have run the first 150 iterations for K=7"
[1] "We have run the first 200 iterations for K=7"
[1] "We have run the first 250 iterations for K=7"
cormotif_tran$bestmotif$loglike
[1] -165303.3
gene_prob_tran <- cormotif_tran$bestmotif$p.post
rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)
plotIC(cormotif_tran)
plotMotif(cormotif_tran)
dim(gene_prob_tran)
[1] 10304 6
prob_day_1 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)
[1] 3802
#3787
prob_day_2 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_2)
[1] 383
#623
prob_day_3 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)
[1] 237
#227
prob_day_4 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_4)
[1] 949
#1154
prob_day_5 <- rownames(gene_prob_tran[(gene_prob_tran[,1] > 0.5 & gene_prob_tran[,2] > 0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)
[1] 401
#324
prob_day_6 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_6)
[1] 420
#464
prob_day_7 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_7)
[1] 1038
#1060
# Lowest when K = 8
set.seed(69)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
compid = compid_tran, K = 8, max.iter=500)
[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"
[1] "We have run the first 350 iterations for K=8"
[1] "We have run the first 400 iterations for K=8"
[1] "We have run the first 450 iterations for K=8"
[1] "We have run the first 500 iterations for K=8"
cormotif_tran$bestmotif$loglike
[1] -165298.6
gene_prob_tran <- cormotif_tran$bestmotif$p.post
rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)
plotIC(cormotif_tran)
plotMotif(cormotif_tran)
dim(gene_prob_tran)
[1] 10304 6
prob_day_1 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)
[1] 3795
#3787
prob_day_2 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_2)
[1] 370
#328
prob_day_3 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)
[1] 374
#622
prob_day_4 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_4)
[1] 953
#1147
prob_day_5 <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] < 0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)
[1] 240
#227
prob_day_6 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_6)
[1] 179
#170
prob_day_7 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_7)
[1] 1038
#1060
prob_day_8 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6] > 0.5),])
length(prob_day_8)
[1] 433
#473
# The only thing we are changing is the compid_tran. Now, we are comparing everything to day 0 rather than the previous day
compid_tran <- data.frame(c1 = c(1, 1, 1, 5, 5, 5),
c2 = c(2, 3, 4, 6, 7, 8))
set.seed(66)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
compid = compid_tran, K = 8, max.iter=500)
[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
cormotif_tran$bestmotif$loglike
[1] -188523.9
gene_prob_tran <- cormotif_tran$bestmotif$p.post
rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)
plotIC(cormotif_tran)
plotMotif(cormotif_tran)
dim(gene_prob_tran)
[1] 10304 6
# Pull the genes from seed #66 when K = 8.
prob_day_1 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)
[1] 3446
#3446
prob_day_2 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_2)
[1] 249
#249
prob_day_3 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)
[1] 601
#601
prob_day_4 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_4)
[1] 449
#449
prob_day_5 <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)
[1] 188
#26
prob_day_6 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6] > 0.5),])
length(prob_day_6)
[1] 1898
#1898
#prob_day_7 <- rownames(gene_prob_tran[(gene_prob_tran[,1] > 0.5 & gene_prob_tran[,1] < 0.8 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,2] < 0.8 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,3] <0.8 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,4] <0.8 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,5] < 0.8 & gene_prob_tran[,6]>0.5 & gene_prob_tran[,6]<0.8),])
#length(prob_day_7)
#2
prob_day_7 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6] > 0.5),])
length(prob_day_7)
[1] 2
#2
#prob_day_8 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.8 & gene_prob_tran[,2] >0.8 & gene_prob_tran[,3] >0.8 & gene_prob_tran[,4] >0.8 & gene_prob_tran[,5] > 0.8 & gene_prob_tran[,6] > 0.8),])
#length(prob_day_8)
prob_day_8 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6] > 0.5),])
length(prob_day_8)
[1] 2078
#2169
# Load the data
cpm_cyclicloess_40 <- read.table("~/Desktop/Endoderm_TC/ashlar-trial/data/cpm_cyclicloess_40_pre_norm.txt")
# Setup matrix
mean_tech_reps_matrix <- as.matrix(cpm_cyclicloess_40, nrow=10304, ncol = 40)
mean_tech_reps_matrix[1:10304,1:40] = as.numeric(as.character(mean_tech_reps_matrix[1:10304,1:40]))
colnames(mean_tech_reps_matrix) <- colnames(cpm_cyclicloess_40)
rownames(mean_tech_reps_matrix) <- rownames(cpm_cyclicloess_40)
compid_tran <- data.frame(c1 = c(1, 2, 3, 5, 6, 7),
c2 = c(2, 3, 4, 6, 7, 8))
set.seed(66)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
compid = compid_tran, K = 8, max.iter=500)
[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"
[1] "We have run the first 350 iterations for K=8"
cormotif_tran$bestmotif$loglike
[1] -166482.3
gene_prob_tran <- cormotif_tran$bestmotif$p.post
rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)
plotIC(cormotif_tran)
plotMotif(cormotif_tran)
dim(gene_prob_tran)
[1] 10304 6
# Pull the genes from seed #66 when K = 8.
prob_day_1 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)
[1] 3540
#3540
prob_day_2 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_2)
[1] 1033
#1033
prob_day_3 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)
[1] 213
#213
prob_day_4 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_4)
[1] 100
#100
prob_day_5 <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)
[1] 536
#536
prob_day_6 <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_6)
[1] 1067
#1067
prob_day_7 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_7)
[1] 700
#700
prob_day_8 <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_8)
[1] 334
#334
# Create a factor for the experiments, with the levels ordered such that as.numeric converts them to numbers in a defined manner
# Note: there are 6 human biological replicates and 4 chimp biological replicates
species <- c("H", "H","H","H","H","H", "C", "C","C","C",
"H","H","H","H","H","H", "C", "C","C","C",
"H","H","H","H","H","H", "C", "C","C","C",
"H","H","H","H","H","H", "C", "C","C","C")
day <- c("0", "0","0","0","0","0","0", "0", "0", "0", "1","1","1","1","1","1","1","1", "1","1",
"2", "2","2","2","2","2","2","2","2", "2",
"3", "3","3","3","3","3","3","3", "3", "3")
group_fac <- factor(paste(species, day, sep = "."),
levels = c(
"H.0", "H.1", "H.2", "H.3", "C.0", "C.1", "C.2", "C.3"))
groupid <- as.numeric(group_fac)
# Conditions to compare:
# day0, day1, day2, day3,
compid_day <- data.frame(c1 = c(1, 2, 3, 4),
c2 = c(5, 6, 7, 8))
#interspecies differences at each day
set.seed(12345)
cormotif_day <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid, compid = compid_day, K = 3:5)
[1] "We have run the first 50 iterations for K=3"
[1] "We have run the first 100 iterations for K=3"
[1] "We have run the first 50 iterations for K=4"
[1] "We have run the first 100 iterations for K=4"
[1] "We have run the first 50 iterations for K=5"
[1] "We have run the first 100 iterations for K=5"
plotIC(cormotif_day)
plotMotif(cormotif_day)
gene_prob_day <- cormotif_day$bestmotif$p.post
rownames(gene_prob_day) <- rownames(cpm_cyclicloess_40)
prob_sp_1 <- rownames(gene_prob_day[(gene_prob_day[,1] < 0.5 & gene_prob_day[,2] <0.5 & gene_prob_day[,3] <0.5 & gene_prob_day[,4] <0.5),])
length(prob_sp_1)
[1] 4365
#4495
prob_sp_2 <- rownames(gene_prob_day[(gene_prob_day[,1] >0.7 & gene_prob_day[,2] >0.9 & gene_prob_day[,3] <0.5 & gene_prob_day[,4] <0.5),])
length(prob_sp_2)
[1] 289
#352
prob_sp_3 <- rownames(gene_prob_day[(gene_prob_day[,1] <0.5 & gene_prob_day[,2] <0.5 & gene_prob_day[,3] > 0.9 & gene_prob_day[,4] > 0.9),])
length(prob_sp_3)
[1] 210
#221
prob_sp_4 <- rownames(gene_prob_day[(gene_prob_day[,1] >0.9& gene_prob_day[,2] >0.9 & gene_prob_day[,3]>0.9 & gene_prob_day[,4] >0.9),])
length(prob_sp_4)
[1] 2645
#2509