Introduction

The goal of this script is to find the average bp length of the methylated regions.

# Import the data

region_array <- read.delim("../data/region_array_subset.txt")
cpg_array <- read.delim("../data/dfCovnoXposOnly.txt", sep = " ")
humans_heart_kidney_DMRs <- read.delim("../data/humans_heart_kidney_DMRs.txt")
human_chimp_heart <- read.delim("../data/Methylation_sDMRs/HumanChimp_heart_DMRs.txt")
human_chimp_liver <- read.delim("../data/Methylation_sDMRs/HumanChimp_liver_DMRs.txt")

Average bp length

cpg_array[,2] <- rownames(cpg_array)

starting_pos <- merge(x = region_array, y = cpg_array, by.x = c("starting"), by.y = c("V2"))
ending_pos <- merge(x = region_array, y = cpg_array, by.x = c("ending"), by.y = c("V2"))

length_bp <- data.frame(ending_pos$dfCovnoX - starting_pos$dfCovnoX)

no_neg <- which(length_bp[,1] < 0 )
length_bp_new <- length_bp[-(no_neg),]

summary(length_bp_new)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     4.0   207.0   354.0   484.9   593.0 10462.0

Compare to DMR data

summary(humans_heart_kidney_DMRs$end - humans_heart_kidney_DMRs$start)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     6.0   260.0   485.0   593.3   829.0  4630.0
summary(human_chimp_heart$end - human_chimp_heart$start)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     4.0   171.0   315.0   393.8   532.0  2726.0
summary(human_chimp_liver$end - human_chimp_liver$start)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       4     169     312     398     536    3336

Average number of orthologous CpGs

summary((region_array$ending - region_array$starting)+1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.000   3.000   4.000   7.295   7.000 289.000
summary(human_chimp_heart$n)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.000   4.000   7.000   9.699  12.000 134.000