In this script, we will evaluate the embryonic lethality of 3 categories of genes: shared reduction of variation, reduction of variation in only one species, and non-reduced genes.
source("chunk-options.R")
## Warning: package 'knitr' was built under R version 3.4.4
To obtain the data, we took the gene list of each of the 3 categories of genes and ran it through the Mammalian Phenotype database from Jackson Lab: http://www.informatics.jax.org/batch/summary.
# Load data- shared data
Lethality_shared_genes <- read.delim("../data/Lethality_shared_genes.txt")
length(unique(Lethality_shared_genes$Input))
[1] 380
summary(Lethality_shared_genes)
Input Input.Type MGI.Gene.Marker.ID
FGF8 : 173 current symbol :4891 MGI:99604 : 172
FBN1 : 142 human synonym : 255 MGI:95489 : 142
PRKDC : 135 old symbol : 101 MGI:104779 : 135
MTOR : 134 zebrafish symbol: 90 MGI:1928394: 134
NODAL : 110 synonym : 86 MGI:97359 : 110
CTSL2 : 108 human symbol : 22 MGI:88564 : 108
(Other):4663 (Other) : 20 (Other) :4664
Symbol
Fgf8 : 172
Fbn1 : 142
Prkdc : 135
Mtor : 134
Nodal : 110
Ctsl : 108
(Other):4664
Name
fibroblast growth factor 8 : 172
fibrillin 1 : 142
protein kinase, DNA activated, catalytic polypeptide : 135
mechanistic target of rapamycin (serine/threonine kinase): 134
nodal : 110
cathepsin L : 108
(Other) :4664
Feature.Type MP.ID
protein coding gene:5456 : 195
pseudogene : 8 MP:0002169: 56
QTL : 1 MP:0011098: 36
MP:0001262: 35
MP:0002083: 35
MP:0011100: 33
(Other) :5075
Term
: 195
no abnormal phenotype detected : 56
embryonic lethality during organogenesis, complete penetrance: 36
decreased body weight : 35
premature death : 35
preweaning lethality, complete penetrance : 33
(Other) :5075
# Eliminate anything that's not a protein coding gene
protein_lethality <- Lethality_shared_genes[which(Lethality_shared_genes$Feature.Type == "protein coding gene") , ]
length(unique(protein_lethality$Input))
[1] 380
mp_check <- grep("MP", protein_lethality$MP.ID)
complete_mp <- protein_lethality[mp_check, ]
length(unique(complete_mp$Input))
[1] 218
terms_lethality <- grep("lethality", complete_mp$Term)
observe_lethality <- complete_mp[terms_lethality, ]
unique(observe_lethality$Term)
[1] perinatal lethality, incomplete penetrance
[2] neonatal lethality, complete penetrance
[3] prenatal lethality, complete penetrance
[4] embryonic lethality during organogenesis, complete penetrance
[5] lethality throughout fetal growth and development, incomplete penetrance
[6] neonatal lethality, incomplete penetrance
[7] perinatal lethality, complete penetrance
[8] postnatal lethality, complete penetrance
[9] lethality throughout fetal growth and development, complete penetrance
[10] lethality, incomplete penetrance
[11] postnatal lethality
[12] postnatal lethality, incomplete penetrance
[13] preweaning lethality, complete penetrance
[14] embryonic lethality at implantation, complete penetrance
[15] embryonic lethality, complete penetrance
[16] embryonic lethality during organogenesis, incomplete penetrance
[17] perinatal lethality
[18] preweaning lethality, incomplete penetrance
[19] embryonic lethality between implantation and somite formation, complete penetrance
[20] embryonic lethality during organogenesis
[21] embryonic lethality before implantation, complete penetrance
[22] embryonic lethality prior to tooth bud stage
[23] prenatal lethality
[24] prenatal lethality, incomplete penetrance
[25] embryonic lethality before implantation, incomplete penetrance
[26] embryonic lethality prior to organogenesis
[27] lethality, complete penetrance
[28] prenatal lethality prior to heart atrial septation
[29] embryonic lethality between implantation and placentation, complete penetrance
[30] embryonic lethality between implantation and somite formation
[31] embryonic lethality
[32] embryonic lethality, incomplete penetrance
[33] embryonic lethality between somite formation and embryo turning, complete penetrance
[34] embryonic lethality between implantation and placentation
2440 Levels: ... Wormian bones
Terms that we want to include: “embryonic lethality”, “prenatal lethality”, “lethality throughout fetal growth and development”
embryonic_lethality <- grep("embryonic lethality", observe_lethality$Term)
length(embryonic_lethality)
[1] 132
prenatal_lethality <- grep("prenatal lethality", observe_lethality$Term)
length(prenatal_lethality)
[1] 32
fetal_lethality <- grep("lethality throughout fetal growth and development", observe_lethality$Term)
length(fetal_lethality)
[1] 21
lethality1 <- observe_lethality[embryonic_lethality, ]
lethality2 <- observe_lethality[prenatal_lethality, ]
lethality3 <- observe_lethality[fetal_lethality, ]
lethal3 <- rbind(lethality1, lethality2, lethality3)
length(unique(lethal3$Input))
[1] 89
Conclusion: Knockdown in 89/218 of the shared genes with at least one documented associated phenotype is embryonic lethal.
# Write a function to run the code above so that we can run it for other classes of genes
analyze_lethality <- function(data_name){
# Make data name to lethality
Lethality_shared_genes <- data_name
# How many genes are originally there
length_Lethality_shared_genes <- length(unique(Lethality_shared_genes$Input))
# Eliminate anything that's not a protein coding gene
protein_lethality <- Lethality_shared_genes[which(Lethality_shared_genes$Feature.Type == "protein coding gene") , ]
length_protein_lethality <- length(unique(protein_lethality$Input))
# How many genes have an associated phenotype?
mp_check <- grep("MP", protein_lethality$MP.ID)
complete_mp <- protein_lethality[mp_check, ]
genes_associated_phenotype <- length(unique(complete_mp$Input))
# Look for lethality
embryonic_lethality <- grep("embryonic lethality", complete_mp$Term)
length(embryonic_lethality)
prenatal_lethality <- grep("prenatal lethality", complete_mp$Term)
length(prenatal_lethality)
fetal_lethality <- grep("lethality throughout fetal growth and development", complete_mp$Term)
length(fetal_lethality)
lethality1 <- complete_mp[embryonic_lethality, ]
lethality2 <- complete_mp[prenatal_lethality, ]
lethality3 <- complete_mp[fetal_lethality, ]
lethal3 <- rbind(lethality1, lethality2, lethality3)
length_lethal3 <- length(unique(lethal3$Input))
critical_values <- cbind(length_Lethality_shared_genes, genes_associated_phenotype, length_lethal3)
return(critical_values)
}
# Run on shared genes
analyze_lethality(Lethality_shared_genes)
length_Lethality_shared_genes genes_associated_phenotype
[1,] 380 218
length_lethal3
[1,] 89
89/218
[1] 0.4082569
# Run on non-reduced genes
Lethality_non_red_genes <- read.delim("../data/Lethality_non_red_genes.txt")
analyze_lethality(Lethality_non_red_genes)
length_Lethality_shared_genes genes_associated_phenotype
[1,] 7151 4118
length_lethal3
[1,] 1139
1139/4118
[1] 0.2765906
# Run on genes reduced in one species
Lethality_red_one_species <- read.delim("../data/Lethality_red_one_species.txt")
analyze_lethality(Lethality_red_one_species)
length_Lethality_shared_genes genes_associated_phenotype
[1,] 2330 1286
length_lethal3
[1,] 458
458/1286
[1] 0.3561431
# Diff between shared and non reduced
prop.test(c(89,1139),c(218,4118),correct=TRUE)
2-sample test for equality of proportions with continuity
correction
data: c(89, 1139) out of c(218, 4118)
X-squared = 17.038, df = 1, p-value = 3.664e-05
alternative hypothesis: two.sided
95 percent confidence interval:
0.06259038 0.20074222
sample estimates:
prop 1 prop 2
0.4082569 0.2765906
# Diff between shared and reduced in 1 species
prop.test(c(89,458),c(218,1286),correct=TRUE)
2-sample test for equality of proportions with continuity
correction
data: c(89, 458) out of c(218, 1286)
X-squared = 1.9681, df = 1, p-value = 0.1606
alternative hypothesis: two.sided
95 percent confidence interval:
-0.02086789 0.12509549
sample estimates:
prop 1 prop 2
0.4082569 0.3561431