# Background
Gene_ID_Conversion <- read.csv("~/Downloads/Gene_ID_Conversion.csv", stringsAsFactors = FALSE)
# All motifs
ENSG_Motif_Number <- read.csv("~/Desktop/Endoderm_TC/ashlar-trial/data/ENSG_Motif_Number_TMM_cyclic_loess.csv", stringsAsFactors = FALSE)
# Get Gene name for ENSG- all Motifs
list_match <- match(ENSG_Motif_Number$Gene, Gene_ID_Conversion$ensg)
get_gene <- as.data.frame(Gene_ID_Conversion$gene[list_match])
# Gene to Motif
Gene_motif <- cbind(ENSG_Motif_Number[,1], get_gene, ENSG_Motif_Number[,2])
# Separate into motifs
motif1 <- Gene_motif[1:3789,]
motif2 <- Gene_motif[3790:4806, ]
motif3 <- Gene_motif[4807:5025, ]
motif4 <- as.data.frame(Gene_motif[5026:5212, ])
motif5 <- Gene_motif[5213:5832, ]
motif6 <- Gene_motif[5833:6979, ]
motif7 <- as.data.frame(Gene_motif[6980:7665, ])
motif8 <- Gene_motif[7666:8004, ]
dim(motif1)
## [1] 3789 3
dim(motif2)
## [1] 1017 3
dim(motif3)
## [1] 219 3
dim(motif4)
## [1] 187 3
dim(motif5)
## [1] 620 3
dim(motif6)
## [1] 1147 3
dim(motif7)
## [1] 686 3
dim(motif8)
## [1] 339 3
motif4 <- data.frame(lapply(motif4, as.character), stringsAsFactors=FALSE)
motif7 <- data.frame(lapply(motif7, as.character), stringsAsFactors=FALSE)
A <- as.data.frame(motif4[ ,2])
colnames(A) <- c("Gene name")
B <- as.data.frame(motif7[,2])
colnames(B) <- c("Gene name")
motif47 <- rbind(A, B)
write.table(motif47, "~/Dropbox/Endoderm TC/Cyclic_loess_norm/motif47.txt", sep="\t", quote = F, col.names = F, row.names = F)
# Let's compare to a similarly sized group but with the same DE pattern across species (motifs 3 + 5, 839 genes compared to 873 genes in motifs 4+7)
motif3 <- data.frame(lapply(motif3, as.character), stringsAsFactors=FALSE)
motif5 <- data.frame(lapply(motif5, as.character), stringsAsFactors=FALSE)
A <- as.data.frame(motif3[ ,2])
colnames(A) <- c("Gene name")
B <- as.data.frame(motif5[,2])
colnames(B) <- c("Gene name")
motif35 <- rbind(A, B)
#write.table(motif35, "~/Dropbox/Endoderm TC/Cyclic_loess_norm/motif35.txt", sep="\t", quote = F, col.names = F, row.names = F)
# Let's compare to one motif (rather than a combination of motifs; motif 2, 1017)
motif2 <- data.frame(lapply(motif2, as.character), stringsAsFactors=FALSE)
motif2 <- as.data.frame(motif2[ ,2])
colnames(motif2) <- c("Gene name")
#write.table(motif2, "~/Dropbox/Endoderm TC/Cyclic_loess_norm/motif2_only.txt", sep="\t", quote = F, col.names = F, row.names = F)
# Let's look at all the other motifs individually
# Motif 1
motif1 <- data.frame(lapply(motif1, as.character), stringsAsFactors=FALSE)
A <- as.data.frame(motif1[ ,2])
colnames(A) <- c("Gene name")
#write.table(A, "~/Dropbox/Endoderm TC/Cyclic_loess_norm/motif1.txt", sep="\t", quote = F, col.names = F, row.names = F)