# Background

Gene_ID_Conversion <- read.csv("~/Downloads/Gene_ID_Conversion.csv", stringsAsFactors = FALSE)

# All motifs

ENSG_Motif_Number <- read.csv("~/Desktop/Endoderm_TC/ashlar-trial/data/ENSG_Motif_Number_TMM_cyclic_loess.csv", stringsAsFactors = FALSE)

# Get Gene name for ENSG- all Motifs 

list_match <- match(ENSG_Motif_Number$Gene, Gene_ID_Conversion$ensg)

get_gene <- as.data.frame(Gene_ID_Conversion$gene[list_match])

# Gene to Motif

Gene_motif <- cbind(ENSG_Motif_Number[,1], get_gene, ENSG_Motif_Number[,2])

# Separate into motifs

motif1 <-  Gene_motif[1:3789,] 
motif2 <-  Gene_motif[3790:4806, ] 
motif3 <-  Gene_motif[4807:5025, ] 
motif4 <-  as.data.frame(Gene_motif[5026:5212, ]) 
motif5 <-  Gene_motif[5213:5832, ]
motif6 <-  Gene_motif[5833:6979, ] 
motif7 <-  as.data.frame(Gene_motif[6980:7665, ])
motif8 <-  Gene_motif[7666:8004, ] 

dim(motif1)
## [1] 3789    3
dim(motif2)
## [1] 1017    3
dim(motif3)
## [1] 219   3
dim(motif4)
## [1] 187   3
dim(motif5)
## [1] 620   3
dim(motif6)
## [1] 1147    3
dim(motif7)
## [1] 686   3
dim(motif8)
## [1] 339   3
motif4 <- data.frame(lapply(motif4, as.character), stringsAsFactors=FALSE)
motif7 <- data.frame(lapply(motif7, as.character), stringsAsFactors=FALSE)
A <- as.data.frame(motif4[ ,2])
colnames(A) <- c("Gene name")

B <- as.data.frame(motif7[,2])
colnames(B) <- c("Gene name")
motif47 <- rbind(A, B)

write.table(motif47, "~/Dropbox/Endoderm TC/Cyclic_loess_norm/motif47.txt", sep="\t",  quote = F, col.names = F, row.names = F)

# Let's compare to a similarly sized group but with the same DE pattern across species (motifs 3 + 5, 839 genes compared to 873 genes in motifs 4+7)

motif3 <- data.frame(lapply(motif3, as.character), stringsAsFactors=FALSE)
motif5 <- data.frame(lapply(motif5, as.character), stringsAsFactors=FALSE)
A <- as.data.frame(motif3[ ,2])
colnames(A) <- c("Gene name")

B <- as.data.frame(motif5[,2])
colnames(B) <- c("Gene name")
motif35 <- rbind(A, B)

#write.table(motif35, "~/Dropbox/Endoderm TC/Cyclic_loess_norm/motif35.txt", sep="\t",  quote = F, col.names = F, row.names = F)

# Let's compare to one motif (rather than a combination of motifs; motif 2, 1017)

motif2 <- data.frame(lapply(motif2, as.character), stringsAsFactors=FALSE)
motif2 <- as.data.frame(motif2[ ,2])
colnames(motif2) <- c("Gene name")


#write.table(motif2, "~/Dropbox/Endoderm TC/Cyclic_loess_norm/motif2_only.txt", sep="\t",  quote = F, col.names = F, row.names = F)

# Let's look at all the other motifs individually

  # Motif 1
motif1 <- data.frame(lapply(motif1, as.character), stringsAsFactors=FALSE)
A <- as.data.frame(motif1[ ,2])
colnames(A) <- c("Gene name")

#write.table(A, "~/Dropbox/Endoderm TC/Cyclic_loess_norm/motif1.txt", sep="\t",  quote = F, col.names = F, row.names = F)