Cormotif_avg_log2

Load data
Cormotif analysis (DE probabilities for consecutive days)
- Testing out different Ks
- Using the lowest likelihood seeds (Exploration, not used in the paper)
Cormotif analysis- supplement

The goal of this script is to perform the main analyses and the supplemental analyses using Cormotif.

Load data

library("edgeR")

## Warning: package 'edgeR' was built under R version 3.2.4

## Loading required package: limma

## Warning: package 'limma' was built under R version 3.2.4

library("R.utils")

## Warning: package 'R.utils' was built under R version 3.2.5

## Loading required package: R.oo

## Warning: package 'R.oo' was built under R version 3.2.5

## Loading required package: R.methodsS3

## Warning: package 'R.methodsS3' was built under R version 3.2.3

## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.

## R.oo v1.21.0 (2016-10-30) successfully loaded. See ?R.oo for help.

## 
## Attaching package: 'R.oo'

## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods

## The following objects are masked from 'package:base':
## 
##     attach, detach, gc, load, save

## R.utils v2.4.0 (2016-09-13) successfully loaded. See ?R.utils for help.

## 
## Attaching package: 'R.utils'

## The following object is masked from 'package:utils':
## 
##     timestamp

## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, inherits, isOpen, parse, warnings

library("plyr")

## Warning: package 'plyr' was built under R version 3.2.5

library("limma")
library("Biobase")

## Loading required package: BiocGenerics

## Loading required package: parallel

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following object is masked from 'package:limma':
## 
##     plotMA

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
##     grep, grepl, intersect, is.unsorted, lapply, lengths, Map,
##     mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
##     setdiff, sort, table, tapply, union, unique, unlist, unsplit

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

library("GEOquery")

## Setting options('download.file.method.GEOquery'='auto')

## Setting options('GEOquery.inmemory.gpl'=FALSE)

## 
## Attaching package: 'GEOquery'

## The following object is masked from 'package:R.utils':
## 
##     gunzip

library("plyr")
#theme_set(theme_bw(base_size = 16))
library("biomaRt")
library("colorfulVennPlot")

## Loading required package: grid

library("VennDiagram")

## Warning: package 'VennDiagram' was built under R version 3.2.5

## Loading required package: futile.logger

## Warning: package 'futile.logger' was built under R version 3.2.5

library("gridExtra")

## Warning: package 'gridExtra' was built under R version 3.2.4

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:Biobase':
## 
##     combine

## The following object is masked from 'package:BiocGenerics':
## 
##     combine

library("car")

## Warning: package 'car' was built under R version 3.2.5

## 
## Attaching package: 'car'

## The following object is masked from 'package:VennDiagram':
## 
##     ellipse

library("topGO")

## Loading required package: graph

## 
## Attaching package: 'graph'

## The following object is masked from 'package:plyr':
## 
##     join

## Loading required package: GO.db

## Loading required package: AnnotationDbi

## Warning: package 'AnnotationDbi' was built under R version 3.2.3

## Loading required package: stats4

## Loading required package: IRanges

## Warning: package 'IRanges' was built under R version 3.2.3

## Loading required package: S4Vectors

## Warning: package 'S4Vectors' was built under R version 3.2.3

## 
## Attaching package: 'S4Vectors'

## The following object is masked from 'package:plyr':
## 
##     rename

## 
## Attaching package: 'IRanges'

## The following object is masked from 'package:plyr':
## 
##     desc

## The following object is masked from 'package:R.oo':
## 
##     trim

## Loading required package: DBI

## Warning: package 'DBI' was built under R version 3.2.5

##

## Loading required package: SparseM

## Warning: package 'SparseM' was built under R version 3.2.5

## 
## Attaching package: 'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve

## Warning: replacing previous import by 'graph::.__C__dist' when loading
## 'topGO'

## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.

## 
## Attaching package: 'topGO'

## The following object is masked from 'package:IRanges':
## 
##     members

## The following object is masked from 'package:grid':
## 
##     depth

library("rowr")

## 
## Attaching package: 'rowr'

## The following object is masked from 'package:plyr':
## 
##     count

library("Cormotif")

## Loading required package: affy

library("ggfortify")

## Warning: package 'ggfortify' was built under R version 3.2.5

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 3.2.4

library("dplyr")

## Warning: package 'dplyr' was built under R version 3.2.5

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:rowr':
## 
##     coalesce, count

## The following object is masked from 'package:AnnotationDbi':
## 
##     select

## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union

## The following objects are masked from 'package:S4Vectors':
## 
##     intersect, rename, setdiff, union

## The following object is masked from 'package:graph':
## 
##     union

## The following object is masked from 'package:car':
## 
##     recode

## The following object is masked from 'package:gridExtra':
## 
##     combine

## The following object is masked from 'package:biomaRt':
## 
##     select

## The following object is masked from 'package:Biobase':
## 
##     combine

## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union

## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library("tidyr")

## Warning: package 'tidyr' was built under R version 3.2.5

## 
## Attaching package: 'tidyr'

## The following object is masked from 'package:IRanges':
## 
##     expand

## The following object is masked from 'package:R.utils':
## 
##     extract

library("statmod")

## Warning: package 'statmod' was built under R version 3.2.5

library("qtlcharts")

## Warning: package 'qtlcharts' was built under R version 3.2.5

library("mygene")

## Loading required package: GenomicFeatures

## Warning: package 'GenomicFeatures' was built under R version 3.2.3

## Loading required package: GenomeInfoDb

## Warning: package 'GenomeInfoDb' was built under R version 3.2.3

## Loading required package: GenomicRanges

## Warning: package 'GenomicRanges' was built under R version 3.2.3

## 
## Attaching package: 'GenomicFeatures'

## The following object is masked from 'package:topGO':
## 
##     genes

## 
## Attaching package: 'mygene'

## The following object is masked from 'package:dplyr':
## 
##     query

## The following object is masked from 'package:biomaRt':
## 
##     getGene

source("~/Desktop/Endoderm_TC/ashlar-trial/analysis/chunk-options.R")

## Warning: package 'knitr' was built under R version 3.2.5

# Load the data 

cpm_cyclicloess_40 <- read.table("~/Desktop/Endoderm_TC/ashlar-trial/data/cpm_cyclicloess_40.txt")

# Setup matrix
mean_tech_reps_matrix <- as.matrix(cpm_cyclicloess_40, nrow=10304, ncol = 40)

mean_tech_reps_matrix[1:10304,1:40] = as.numeric(as.character(mean_tech_reps_matrix[1:10304,1:40]))
colnames(mean_tech_reps_matrix) <- colnames(cpm_cyclicloess_40)
rownames(mean_tech_reps_matrix) <- rownames(cpm_cyclicloess_40)

Cormotif analysis (DE probabilities for consecutive days)

Testing out different Ks

# Create a factor for the experiments, with the levels ordered such that as.numeric converts them to numbers in a defined manner

# Note: there are 6 human biological replicates and 4 chimp biological replicates

species <- c("H", "H","H","H","H","H", "C", "C","C","C", 
             "H","H","H","H","H","H",  "C", "C","C","C",
             "H","H","H","H","H","H",  "C", "C","C","C",
             "H","H","H","H","H","H",  "C", "C","C","C")
day <- c("0", "0","0","0","0","0","0", "0", "0", "0", "1","1","1","1","1","1","1","1", "1","1",
         "2", "2","2","2","2","2","2","2","2", "2", 
         "3", "3","3","3","3","3","3","3",  "3", "3")

group_fac <- factor(paste(species, day, sep = "."),
                    levels = c("H.0", "H.1", "H.2", "H.3",
                               "C.0", "C.1", "C.2", "C.3"))
groupid <- as.numeric(group_fac)
# Conditions to compare:
# day0, day1, day2, day3, 

compid_tran <- data.frame(c1 = c(1, 2, 3, 5, 6, 7),
                          c2 = c(2, 3, 4, 6, 7, 8))

                         
# Differences during diffentiation

set.seed(1)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
                        compid = compid_tran, K = 5:10, max.iter=500)

[1] "We have run the first 50 iterations for K=5"
[1] "We have run the first 100 iterations for K=5"
[1] "We have run the first 150 iterations for K=5"
[1] "We have run the first 200 iterations for K=5"
[1] "We have run the first 50 iterations for K=6"
[1] "We have run the first 100 iterations for K=6"
[1] "We have run the first 150 iterations for K=6"
[1] "We have run the first 50 iterations for K=7"
[1] "We have run the first 100 iterations for K=7"
[1] "We have run the first 150 iterations for K=7"
[1] "We have run the first 200 iterations for K=7"
[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"
[1] "We have run the first 350 iterations for K=8"
[1] "We have run the first 400 iterations for K=8"
[1] "We have run the first 450 iterations for K=8"
[1] "We have run the first 500 iterations for K=8"
[1] "We have run the first 50 iterations for K=9"
[1] "We have run the first 100 iterations for K=9"
[1] "We have run the first 150 iterations for K=9"
[1] "We have run the first 200 iterations for K=9"
[1] "We have run the first 250 iterations for K=9"
[1] "We have run the first 300 iterations for K=9"
[1] "We have run the first 350 iterations for K=9"
[1] "We have run the first 400 iterations for K=9"
[1] "We have run the first 450 iterations for K=9"
[1] "We have run the first 500 iterations for K=9"
[1] "We have run the first 50 iterations for K=10"
[1] "We have run the first 100 iterations for K=10"
[1] "We have run the first 150 iterations for K=10"
[1] "We have run the first 200 iterations for K=10"
[1] "We have run the first 250 iterations for K=10"
[1] "We have run the first 300 iterations for K=10"
[1] "We have run the first 350 iterations for K=10"
[1] "We have run the first 400 iterations for K=10"
[1] "We have run the first 450 iterations for K=10"

cormotif_tran$bestmotif$loglike

[1] -165215.3

Let’s look at another seed

set.seed(12345)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
                        compid = compid_tran, K = 5:10, max.iter=500)

[1] "We have run the first 50 iterations for K=5"
[1] "We have run the first 100 iterations for K=5"
[1] "We have run the first 150 iterations for K=5"
[1] "We have run the first 200 iterations for K=5"
[1] "We have run the first 50 iterations for K=6"
[1] "We have run the first 100 iterations for K=6"
[1] "We have run the first 150 iterations for K=6"
[1] "We have run the first 200 iterations for K=6"
[1] "We have run the first 250 iterations for K=6"
[1] "We have run the first 300 iterations for K=6"
[1] "We have run the first 50 iterations for K=7"
[1] "We have run the first 100 iterations for K=7"
[1] "We have run the first 150 iterations for K=7"
[1] "We have run the first 200 iterations for K=7"
[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"
[1] "We have run the first 350 iterations for K=8"
[1] "We have run the first 400 iterations for K=8"
[1] "We have run the first 450 iterations for K=8"
[1] "We have run the first 500 iterations for K=8"
[1] "We have run the first 50 iterations for K=9"
[1] "We have run the first 100 iterations for K=9"
[1] "We have run the first 150 iterations for K=9"
[1] "We have run the first 200 iterations for K=9"
[1] "We have run the first 250 iterations for K=9"
[1] "We have run the first 300 iterations for K=9"
[1] "We have run the first 350 iterations for K=9"
[1] "We have run the first 400 iterations for K=9"
[1] "We have run the first 450 iterations for K=9"
[1] "We have run the first 500 iterations for K=9"
[1] "We have run the first 50 iterations for K=10"
[1] "We have run the first 100 iterations for K=10"
[1] "We have run the first 150 iterations for K=10"
[1] "We have run the first 200 iterations for K=10"
[1] "We have run the first 250 iterations for K=10"
[1] "We have run the first 300 iterations for K=10"
[1] "We have run the first 350 iterations for K=10"
[1] "We have run the first 400 iterations for K=10"
[1] "We have run the first 450 iterations for K=10"
[1] "We have run the first 500 iterations for K=10"

plotIC(cormotif_tran)

plotMotif(cormotif_tran)

cormotif_tran$bestmotif$loglike

[1] -165189

set.seed(12345)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
                        compid = compid_tran, K = 7, max.iter=500)

[1] "We have run the first 50 iterations for K=7"
[1] "We have run the first 100 iterations for K=7"
[1] "We have run the first 150 iterations for K=7"
[1] "We have run the first 200 iterations for K=7"
[1] "We have run the first 250 iterations for K=7"
[1] "We have run the first 300 iterations for K=7"
[1] "We have run the first 350 iterations for K=7"
[1] "We have run the first 400 iterations for K=7"
[1] "We have run the first 450 iterations for K=7"
[1] "We have run the first 500 iterations for K=7"

plotIC(cormotif_tran)

plotMotif(cormotif_tran)

cormotif_tran$bestmotif$loglike

[1] -165217.8

set.seed(12345)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
                        compid = compid_tran, K = 8, max.iter=500)

[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"
[1] "We have run the first 350 iterations for K=8"
[1] "We have run the first 400 iterations for K=8"
[1] "We have run the first 450 iterations for K=8"
[1] "We have run the first 500 iterations for K=8"

plotIC(cormotif_tran)

plotMotif(cormotif_tran)

cormotif_tran$bestmotif$loglike

[1] -165217.5

gene_prob_tran <- cormotif_tran$bestmotif$p.post
rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)

# Seed 12345
prob_day_1  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)

[1] 3786

#3786

prob_day_2  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] < 0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6 ]>0.5),])
length(prob_day_2)

[1] 616

#616

prob_day_3  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_3)

[1] 1024

#1024

prob_day_4  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_4)

[1] 97

# 97

prob_day_5  <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)

[1] 654

#654

prob_day_6  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_6)

[1] 215

#215

prob_day_7  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_7)

[1] 223

#223

prob_day_8  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_8)

[1] 1157

#1157

The seed number with the greatest log likelihood is seed # 66 for K = 8 and #4 for K = 7. It should be noted that the log likehood values are relatively similar.

# Re-run Cormotif with the best seed

compid_tran <- data.frame(c1 = c(1, 2, 3, 5, 6, 7),
                          c2 = c(2, 3, 4, 6, 7, 8))


# Best seed when K = 7

set.seed(4)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
                        compid = compid_tran, K = 7, max.iter=500)

[1] "We have run the first 50 iterations for K=7"
[1] "We have run the first 100 iterations for K=7"
[1] "We have run the first 150 iterations for K=7"
[1] "We have run the first 200 iterations for K=7"
[1] "We have run the first 250 iterations for K=7"

cormotif_tran$bestmotif$loglike

[1] -165185.8

gene_prob_tran <- cormotif_tran$bestmotif$p.post

rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)
plotIC(cormotif_tran)

plotMotif(cormotif_tran)

dim(gene_prob_tran)

[1] 10304     6

# Test degree of conservation- posterior probability difference between the humans and the chimps within a motif 

# 0.3 (7734 genes)

length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.3) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.3) & (abs(gene_prob_tran[,3] - gene_prob_tran[,6]) <= 0.3), ]))

[1] 7734

# 0.2 (6663 genes)

length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.2) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.2) & (abs(gene_prob_tran[,3] - gene_prob_tran[,6]) <= 0.2), ]))

[1] 6663

# 0.1 (4846 genes)

length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.1) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.1) & (abs(gene_prob_tran[,3] - gene_prob_tran[,6]) <= 0.1), ]))

[1] 4846

prob_day_1  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)

[1] 3795

write.table(prob_day_1, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_1_K7.txt")

#3795

prob_day_2  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] > 0.5 & gene_prob_tran[,3] < 0.5 & gene_prob_tran[,4] < 0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6 ] < 0.5),])
length(prob_day_2)

[1] 1144

write.table(prob_day_2, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_2_K7.txt")

#1144

prob_day_3  <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] < 0.5 & gene_prob_tran[,3] > 0.5 & gene_prob_tran[,4] < 0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)

[1] 226

write.table(prob_day_3, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_3_K7.txt")

#226

prob_day_4  <- rownames(gene_prob_tran[(gene_prob_tran[,1] > 0.5 & gene_prob_tran[,2] > 0.5 & gene_prob_tran[,3] < 0.5 & gene_prob_tran[,4] > 0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6] < 0.5),])
length(prob_day_4)

[1] 680

write.table(prob_day_4, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_4_K7.txt")

# 680

prob_day_5  <- rownames(gene_prob_tran[(gene_prob_tran[,1] > 0.5 & gene_prob_tran[,2] < 0.5 & gene_prob_tran[,3] < 0.5 & gene_prob_tran[,4] > 0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6] < 0.5),])
length(prob_day_5)

[1] 1035

write.table(prob_day_5, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_5_K7.txt")

#1035

prob_day_6  <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] > 0.5 & gene_prob_tran[,3] > 0.5 & gene_prob_tran[,4] < 0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6] > 0.5),])
length(prob_day_6)

[1] 618

write.table(prob_day_6, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_6_K7.txt")

#618

prob_day_7  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] > 0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_7)

[1] 371

write.table(prob_day_7, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_7_K7.txt")

#371

# Best seed when K = 8

set.seed(66)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
                        compid = compid_tran, K = 5:10, max.iter=500)

[1] "We have run the first 50 iterations for K=5"
[1] "We have run the first 100 iterations for K=5"
[1] "We have run the first 150 iterations for K=5"
[1] "We have run the first 200 iterations for K=5"
[1] "We have run the first 50 iterations for K=6"
[1] "We have run the first 100 iterations for K=6"
[1] "We have run the first 150 iterations for K=6"
[1] "We have run the first 200 iterations for K=6"
[1] "We have run the first 50 iterations for K=7"
[1] "We have run the first 100 iterations for K=7"
[1] "We have run the first 150 iterations for K=7"
[1] "We have run the first 200 iterations for K=7"
[1] "We have run the first 250 iterations for K=7"
[1] "We have run the first 300 iterations for K=7"
[1] "We have run the first 350 iterations for K=7"
[1] "We have run the first 400 iterations for K=7"
[1] "We have run the first 450 iterations for K=7"
[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"
[1] "We have run the first 350 iterations for K=8"
[1] "We have run the first 400 iterations for K=8"
[1] "We have run the first 450 iterations for K=8"
[1] "We have run the first 500 iterations for K=8"
[1] "We have run the first 50 iterations for K=9"
[1] "We have run the first 100 iterations for K=9"
[1] "We have run the first 150 iterations for K=9"
[1] "We have run the first 200 iterations for K=9"
[1] "We have run the first 250 iterations for K=9"
[1] "We have run the first 300 iterations for K=9"
[1] "We have run the first 350 iterations for K=9"
[1] "We have run the first 50 iterations for K=10"
[1] "We have run the first 100 iterations for K=10"
[1] "We have run the first 150 iterations for K=10"
[1] "We have run the first 200 iterations for K=10"
[1] "We have run the first 250 iterations for K=10"
[1] "We have run the first 300 iterations for K=10"
[1] "We have run the first 350 iterations for K=10"
[1] "We have run the first 400 iterations for K=10"
[1] "We have run the first 450 iterations for K=10"
[1] "We have run the first 500 iterations for K=10"

plotIC(cormotif_tran)

plotMotif(cormotif_tran)

set.seed(66)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
                        compid = compid_tran, K = 8, max.iter=500)

[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"

cormotif_tran$bestmotif$loglike

[1] -165165

gene_prob_tran <- cormotif_tran$bestmotif$p.post
rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)
plotIC(cormotif_tran)

plotMotif(cormotif_tran)

dim(gene_prob_tran)

[1] 10304     6

# Test degree of conservation for days 0-3: posterior probability difference between the humans and the chimps within a motif 

# 0.3 (7795 genes)

length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.3) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.3) & (abs(gene_prob_tran[,3] - gene_prob_tran[,6]) <= 0.3), ]))

[1] 7795

# 0.2 (6740 genes)

length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.2) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.2) & (abs(gene_prob_tran[,3] - gene_prob_tran[,6]) <= 0.2), ]))

[1] 6740

# 0.1 (4877 genes)

length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.1) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.1) & (abs(gene_prob_tran[,3] - gene_prob_tran[,6]) <= 0.1), ]))

[1] 4877

# Test degree of conservation for days 0-2: posterior probability difference between the humans and the chimps within a motif 

# 0.3 (9634 genes)

length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.3) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.3) , ]))

[1] 9634

# 0.2 (9076 genes)

length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.2) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.2) , ]))

[1] 9076

# 0.1 (7899 genes)

length(rownames(gene_prob_tran[(abs(gene_prob_tran[,1] - gene_prob_tran[,4]) <= 0.1) & (abs(gene_prob_tran[,2] - gene_prob_tran[,5]) <= 0.1) , ]))

[1] 7899

# Pull the genes from seed #66 when K = 8. 

prob_day_1  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)

[1] 3789

#3789
write.table(prob_day_1, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_1_K8.txt")

prob_day_2  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_2)

[1] 1017

#1017
write.table(prob_day_2, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_2_K8.txt")

prob_day_3  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)

[1] 219

#219
write.table(prob_day_3, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_3_K8.txt")

prob_day_4  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_4)

[1] 187

#187
write.table(prob_day_4, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_4_K8.txt")

prob_day_5  <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)

[1] 620

#620
write.table(prob_day_5, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_5_K8.txt")

prob_day_6  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_6)

[1] 1147

#1147
write.table(prob_day_6, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_6_K8.txt")

prob_day_7  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_7)

[1] 686

#686
write.table(prob_day_7, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_7_K8.txt")

prob_day_8  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_8)

[1] 339

#339
write.table(prob_day_8, file="~/Desktop/Endoderm_TC/ashlar-trial/data/prob_cluster_8_K8.txt")

prob_day_1  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)

[1] 3789

#3789

prob_day_2  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_2)

[1] 1017

#1017

prob_day_3  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)

[1] 219

#219

prob_day_4  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_4)

[1] 187

#187

prob_day_5  <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)

[1] 620

#620

prob_day_6  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_6)

[1] 1147

#1147

prob_day_7  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_7)

[1] 686

#686

prob_day_8  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_8)

[1] 339

#339

Using the lowest likelihood seeds (Exploration, not used in the paper)

# Lowest when K = 7

set.seed(54)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
                        compid = compid_tran, K = 7, max.iter=500)

[1] "We have run the first 50 iterations for K=7"
[1] "We have run the first 100 iterations for K=7"
[1] "We have run the first 150 iterations for K=7"
[1] "We have run the first 200 iterations for K=7"
[1] "We have run the first 250 iterations for K=7"

cormotif_tran$bestmotif$loglike

[1] -165303.3

gene_prob_tran <- cormotif_tran$bestmotif$p.post

rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)
plotIC(cormotif_tran)

plotMotif(cormotif_tran)

dim(gene_prob_tran)

[1] 10304     6

prob_day_1  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)

[1] 3802

#3787

prob_day_2  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_2)

[1] 383

#623

prob_day_3  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)

[1] 237

#227

prob_day_4  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_4)

[1] 949

#1154

prob_day_5  <- rownames(gene_prob_tran[(gene_prob_tran[,1] > 0.5 & gene_prob_tran[,2] > 0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)

[1] 401

#324

prob_day_6  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_6)

[1] 420

#464

prob_day_7  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_7)

[1] 1038

#1060

# Lowest when K = 8
set.seed(69)

cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
                        compid = compid_tran, K = 8, max.iter=500)

[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"
[1] "We have run the first 350 iterations for K=8"
[1] "We have run the first 400 iterations for K=8"
[1] "We have run the first 450 iterations for K=8"
[1] "We have run the first 500 iterations for K=8"

cormotif_tran$bestmotif$loglike

[1] -165298.6

gene_prob_tran <- cormotif_tran$bestmotif$p.post

rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)
plotIC(cormotif_tran)

plotMotif(cormotif_tran)

dim(gene_prob_tran)

[1] 10304     6

prob_day_1  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)

[1] 3795

#3787

prob_day_2  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_2)

[1] 370

#328

prob_day_3  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)

[1] 374

#622

prob_day_4  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_4)

[1] 953

#1147

prob_day_5  <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] < 0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)

[1] 240

#227

prob_day_6  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_6)

[1] 179

#170

prob_day_7  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_7)

[1] 1038

#1060

prob_day_8  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6] > 0.5),])
length(prob_day_8)

[1] 433

#473

Cormotif analysis- supplement

DE probabilities all relative to day 0 rather than the previous day

# The only thing we are changing is the compid_tran. Now, we are comparing everything to day 0 rather than the previous day

compid_tran <- data.frame(c1 = c(1, 1, 1, 5, 5, 5),
                          c2 = c(2, 3, 4, 6, 7, 8))

   

set.seed(66)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
                        compid = compid_tran, K = 8, max.iter=500)

[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"

cormotif_tran$bestmotif$loglike

[1] -188523.9

gene_prob_tran <- cormotif_tran$bestmotif$p.post

rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)
plotIC(cormotif_tran)

plotMotif(cormotif_tran)

dim(gene_prob_tran)

[1] 10304     6

# Pull the genes from seed #66 when K = 8. 


prob_day_1  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)

[1] 3446

#3446

prob_day_2  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_2)

[1] 249

#249

prob_day_3  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)

[1] 601

#601

prob_day_4  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_4)

[1] 449

#449

prob_day_5  <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)

[1] 188

#26

prob_day_6  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6] > 0.5),])
length(prob_day_6)

[1] 1898

#1898



#prob_day_7  <- rownames(gene_prob_tran[(gene_prob_tran[,1] > 0.5 & gene_prob_tran[,1] < 0.8 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,2] < 0.8 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,3] <0.8 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,4] <0.8 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,5] < 0.8  & gene_prob_tran[,6]>0.5 & gene_prob_tran[,6]<0.8),])
#length(prob_day_7)
#2


prob_day_7  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6] > 0.5),])
length(prob_day_7)

[1] 2

#2

#prob_day_8  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.8 & gene_prob_tran[,2] >0.8 & gene_prob_tran[,3] >0.8 & gene_prob_tran[,4] >0.8 & gene_prob_tran[,5] > 0.8 & gene_prob_tran[,6] > 0.8),])
#length(prob_day_8)

prob_day_8  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6] > 0.5),])
length(prob_day_8)

[1] 2078

#2169

DE probabilities all relative to the previous day but using the data that was combined pre-normalization

# Load the data 

cpm_cyclicloess_40 <- read.table("~/Desktop/Endoderm_TC/ashlar-trial/data/cpm_cyclicloess_40_pre_norm.txt")

# Setup matrix
mean_tech_reps_matrix <- as.matrix(cpm_cyclicloess_40, nrow=10304, ncol = 40)

mean_tech_reps_matrix[1:10304,1:40] = as.numeric(as.character(mean_tech_reps_matrix[1:10304,1:40]))
colnames(mean_tech_reps_matrix) <- colnames(cpm_cyclicloess_40)
rownames(mean_tech_reps_matrix) <- rownames(cpm_cyclicloess_40)

compid_tran <- data.frame(c1 = c(1, 2, 3, 5, 6, 7),
                          c2 = c(2, 3, 4, 6, 7, 8))

set.seed(66)
cormotif_tran <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid,
                        compid = compid_tran, K = 8, max.iter=500)

[1] "We have run the first 50 iterations for K=8"
[1] "We have run the first 100 iterations for K=8"
[1] "We have run the first 150 iterations for K=8"
[1] "We have run the first 200 iterations for K=8"
[1] "We have run the first 250 iterations for K=8"
[1] "We have run the first 300 iterations for K=8"
[1] "We have run the first 350 iterations for K=8"

cormotif_tran$bestmotif$loglike

[1] -166482.3

gene_prob_tran <- cormotif_tran$bestmotif$p.post
rownames(gene_prob_tran) <- rownames( cpm_cyclicloess_40)
plotIC(cormotif_tran)

plotMotif(cormotif_tran)

dim(gene_prob_tran)

[1] 10304     6

# Pull the genes from seed #66 when K = 8. 

prob_day_1  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_1)

[1] 3540

#3540

prob_day_2  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_2)

[1] 1033

#1033

prob_day_3  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_3)

[1] 213

#213

prob_day_4  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] <0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] < 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_4)

[1] 100

#100

prob_day_5  <- rownames(gene_prob_tran[(gene_prob_tran[,1] < 0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_5)

[1] 536

#536

prob_day_6  <- rownames(gene_prob_tran[(gene_prob_tran[,1] <0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] <0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_6)

[1] 1067

#1067

prob_day_7  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] <0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]<0.5),])
length(prob_day_7)

[1] 700

#700

prob_day_8  <- rownames(gene_prob_tran[(gene_prob_tran[,1] >0.5 & gene_prob_tran[,2] >0.5 & gene_prob_tran[,3] >0.5 & gene_prob_tran[,4] >0.5 & gene_prob_tran[,5] > 0.5 & gene_prob_tran[,6]>0.5),])
length(prob_day_8)

[1] 334

#334

Identifying genes with interspecies differences by day (not used in the paper)

# Create a factor for the experiments, with the levels ordered such that as.numeric converts them to numbers in a defined manner

# Note: there are 6 human biological replicates and 4 chimp biological replicates

species <- c("H", "H","H","H","H","H", "C", "C","C","C", 
             "H","H","H","H","H","H",  "C", "C","C","C",
             "H","H","H","H","H","H",  "C", "C","C","C",
             "H","H","H","H","H","H",  "C", "C","C","C")
day <- c("0", "0","0","0","0","0","0", "0", "0", "0", "1","1","1","1","1","1","1","1", "1","1",
         "2", "2","2","2","2","2","2","2","2", "2", 
         "3", "3","3","3","3","3","3","3",  "3", "3")

group_fac <- factor(paste(species, day, sep = "."),
                    levels = c( 
                               "H.0", "H.1", "H.2", "H.3", "C.0", "C.1", "C.2", "C.3"))
groupid <- as.numeric(group_fac)

# Conditions to compare:
# day0, day1, day2, day3, 
compid_day <- data.frame(c1 = c(1, 2, 3, 4),
                         c2 = c(5, 6, 7, 8))


#interspecies differences at each day
set.seed(12345)
cormotif_day <- cormotiffit(exprs = mean_tech_reps_matrix, groupid = groupid, compid = compid_day, K = 3:5)

[1] "We have run the first 50 iterations for K=3"
[1] "We have run the first 100 iterations for K=3"
[1] "We have run the first 50 iterations for K=4"
[1] "We have run the first 100 iterations for K=4"
[1] "We have run the first 50 iterations for K=5"
[1] "We have run the first 100 iterations for K=5"

plotIC(cormotif_day)

plotMotif(cormotif_day)

gene_prob_day <- cormotif_day$bestmotif$p.post
rownames(gene_prob_day) <- rownames(cpm_cyclicloess_40)


prob_sp_1 <- rownames(gene_prob_day[(gene_prob_day[,1] < 0.5 & gene_prob_day[,2] <0.5 & gene_prob_day[,3] <0.5 & gene_prob_day[,4] <0.5),])
length(prob_sp_1)

[1] 4365

#4495

prob_sp_2 <- rownames(gene_prob_day[(gene_prob_day[,1] >0.7 & gene_prob_day[,2] >0.9 & gene_prob_day[,3] <0.5 & gene_prob_day[,4] <0.5),])
length(prob_sp_2)

[1] 289

#352

prob_sp_3 <- rownames(gene_prob_day[(gene_prob_day[,1] <0.5 & gene_prob_day[,2] <0.5 & gene_prob_day[,3] > 0.9 & gene_prob_day[,4] > 0.9),])
length(prob_sp_3)

[1] 210

#221

prob_sp_4 <- rownames(gene_prob_day[(gene_prob_day[,1] >0.9& gene_prob_day[,2] >0.9 & gene_prob_day[,3]>0.9 & gene_prob_day[,4] >0.9),])
length(prob_sp_4)

[1] 2645

#2509

Cormotif_avg_log2_CPM

Lauren Blake

January 8, 2017