The goal of this script is to find the average bp length of the methylated regions.
# Import the data
region_array <- read.delim("../data/region_array_subset.txt")
cpg_array <- read.delim("../data/dfCovnoXposOnly.txt", sep = " ")
humans_heart_kidney_DMRs <- read.delim("../data/humans_heart_kidney_DMRs.txt")
human_chimp_heart <- read.delim("../data/Methylation_sDMRs/HumanChimp_heart_DMRs.txt")
human_chimp_liver <- read.delim("../data/Methylation_sDMRs/HumanChimp_liver_DMRs.txt")
cpg_array[,2] <- rownames(cpg_array)
starting_pos <- merge(x = region_array, y = cpg_array, by.x = c("starting"), by.y = c("V2"))
ending_pos <- merge(x = region_array, y = cpg_array, by.x = c("ending"), by.y = c("V2"))
length_bp <- data.frame(ending_pos$dfCovnoX - starting_pos$dfCovnoX)
no_neg <- which(length_bp[,1] < 0 )
length_bp_new <- length_bp[-(no_neg),]
summary(length_bp_new)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.0 207.0 354.0 484.9 593.0 10462.0
summary(humans_heart_kidney_DMRs$end - humans_heart_kidney_DMRs$start)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.0 260.0 485.0 593.3 829.0 4630.0
summary(human_chimp_heart$end - human_chimp_heart$start)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.0 171.0 315.0 393.8 532.0 2726.0
summary(human_chimp_liver$end - human_chimp_liver$start)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4 169 312 398 536 3336
summary((region_array$ending - region_array$starting)+1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 3.000 4.000 7.295 7.000 289.000
summary(human_chimp_heart$n)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 4.000 7.000 9.699 12.000 134.000