In this script, we will evaluate the embryonic lethality of 3 categories of genes: shared reduction of variation, reduction of variation in only one species, and non-reduced genes.

source("chunk-options.R")
## Warning: package 'knitr' was built under R version 3.4.4

Import the data

To obtain the data, we took the gene list of each of the 3 categories of genes and ran it through the Mammalian Phenotype database from Jackson Lab: http://www.informatics.jax.org/batch/summary.

# Load data- shared data

Lethality_shared_genes <- read.delim("../data/Lethality_shared_genes.txt")

Data Exploration

How many genes do we start with?

length(unique(Lethality_shared_genes$Input))
[1] 380
summary(Lethality_shared_genes)
     Input                 Input.Type     MGI.Gene.Marker.ID
 FGF8   : 173   current symbol  :4891   MGI:99604  : 172    
 FBN1   : 142   human synonym   : 255   MGI:95489  : 142    
 PRKDC  : 135   old symbol      : 101   MGI:104779 : 135    
 MTOR   : 134   zebrafish symbol:  90   MGI:1928394: 134    
 NODAL  : 110   synonym         :  86   MGI:97359  : 110    
 CTSL2  : 108   human symbol    :  22   MGI:88564  : 108    
 (Other):4663   (Other)         :  20   (Other)    :4664    
     Symbol    
 Fgf8   : 172  
 Fbn1   : 142  
 Prkdc  : 135  
 Mtor   : 134  
 Nodal  : 110  
 Ctsl   : 108  
 (Other):4664  
                                                        Name     
 fibroblast growth factor 8                               : 172  
 fibrillin 1                                              : 142  
 protein kinase, DNA activated, catalytic polypeptide     : 135  
 mechanistic target of rapamycin (serine/threonine kinase): 134  
 nodal                                                    : 110  
 cathepsin L                                              : 108  
 (Other)                                                  :4664  
              Feature.Type         MP.ID     
 protein coding gene:5456             : 195  
 pseudogene         :   8   MP:0002169:  56  
 QTL                :   1   MP:0011098:  36  
                            MP:0001262:  35  
                            MP:0002083:  35  
                            MP:0011100:  33  
                            (Other)   :5075  
                                                            Term     
                                                              : 195  
 no abnormal phenotype detected                               :  56  
 embryonic lethality during organogenesis, complete penetrance:  36  
 decreased body weight                                        :  35  
 premature death                                              :  35  
 preweaning lethality, complete penetrance                    :  33  
 (Other)                                                      :5075  
# Eliminate anything that's not a protein coding gene

protein_lethality <- Lethality_shared_genes[which(Lethality_shared_genes$Feature.Type == "protein coding gene") , ]

length(unique(protein_lethality$Input))
[1] 380

How many genes don’t have an associated phenotype?

mp_check <- grep("MP", protein_lethality$MP.ID)

complete_mp <- protein_lethality[mp_check, ]

length(unique(complete_mp$Input))
[1] 218

What terms include some variant of “embryonic lethality”?

terms_lethality <- grep("lethality", complete_mp$Term)

observe_lethality <- complete_mp[terms_lethality, ]

unique(observe_lethality$Term)
 [1] perinatal lethality, incomplete penetrance                                          
 [2] neonatal lethality, complete penetrance                                             
 [3] prenatal lethality, complete penetrance                                             
 [4] embryonic lethality during organogenesis, complete penetrance                       
 [5] lethality throughout fetal growth and development, incomplete penetrance            
 [6] neonatal lethality, incomplete penetrance                                           
 [7] perinatal lethality, complete penetrance                                            
 [8] postnatal lethality, complete penetrance                                            
 [9] lethality throughout fetal growth and development, complete penetrance              
[10] lethality, incomplete penetrance                                                    
[11] postnatal lethality                                                                 
[12] postnatal lethality, incomplete penetrance                                          
[13] preweaning lethality, complete penetrance                                           
[14] embryonic lethality at implantation, complete penetrance                            
[15] embryonic lethality, complete penetrance                                            
[16] embryonic lethality during organogenesis, incomplete penetrance                     
[17] perinatal lethality                                                                 
[18] preweaning lethality, incomplete penetrance                                         
[19] embryonic lethality between implantation and somite formation, complete penetrance  
[20] embryonic lethality during organogenesis                                            
[21] embryonic lethality before implantation, complete penetrance                        
[22] embryonic lethality prior to tooth bud stage                                        
[23] prenatal lethality                                                                  
[24] prenatal lethality, incomplete penetrance                                           
[25] embryonic lethality before implantation, incomplete penetrance                      
[26] embryonic lethality prior to organogenesis                                          
[27] lethality, complete penetrance                                                      
[28] prenatal lethality prior to heart atrial septation                                  
[29] embryonic lethality between implantation and placentation, complete penetrance      
[30] embryonic lethality between implantation and somite formation                       
[31] embryonic lethality                                                                 
[32] embryonic lethality, incomplete penetrance                                          
[33] embryonic lethality between somite formation and embryo turning, complete penetrance
[34] embryonic lethality between implantation and placentation                           
2440 Levels:  ... Wormian bones

Terms that we want to include: “embryonic lethality”, “prenatal lethality”, “lethality throughout fetal growth and development”

Pick out the terms

embryonic_lethality <- grep("embryonic lethality", observe_lethality$Term)
length(embryonic_lethality)
[1] 132
prenatal_lethality <- grep("prenatal lethality", observe_lethality$Term)
length(prenatal_lethality)
[1] 32
fetal_lethality <- grep("lethality throughout fetal growth and development", observe_lethality$Term)
length(fetal_lethality)
[1] 21
lethality1 <- observe_lethality[embryonic_lethality, ]
lethality2 <- observe_lethality[prenatal_lethality, ]
lethality3 <- observe_lethality[fetal_lethality, ]

lethal3 <- rbind(lethality1, lethality2, lethality3)

length(unique(lethal3$Input))
[1] 89

Conclusion: Knockdown in 89/218 of the shared genes with at least one documented associated phenotype is embryonic lethal.

# Write a function to run the code above so that we can run it for other classes of genes

analyze_lethality <- function(data_name){

# Make data name to lethality
Lethality_shared_genes <- data_name  

# How many genes are originally there    
length_Lethality_shared_genes <- length(unique(Lethality_shared_genes$Input))


# Eliminate anything that's not a protein coding gene

protein_lethality <- Lethality_shared_genes[which(Lethality_shared_genes$Feature.Type == "protein coding gene") , ]

length_protein_lethality <- length(unique(protein_lethality$Input))

# How many genes have an associated phenotype?

mp_check <- grep("MP", protein_lethality$MP.ID)

complete_mp <- protein_lethality[mp_check, ]

genes_associated_phenotype <- length(unique(complete_mp$Input))

# Look for lethality

embryonic_lethality <- grep("embryonic lethality", complete_mp$Term)
length(embryonic_lethality)
prenatal_lethality <- grep("prenatal lethality", complete_mp$Term)
length(prenatal_lethality)
fetal_lethality <- grep("lethality throughout fetal growth and development", complete_mp$Term)
length(fetal_lethality)

lethality1 <- complete_mp[embryonic_lethality, ]
lethality2 <- complete_mp[prenatal_lethality, ]
lethality3 <- complete_mp[fetal_lethality, ]

lethal3 <- rbind(lethality1, lethality2, lethality3)

length_lethal3 <- length(unique(lethal3$Input))

critical_values <- cbind(length_Lethality_shared_genes, genes_associated_phenotype, length_lethal3)
return(critical_values)
}

# Run on shared genes

analyze_lethality(Lethality_shared_genes)
     length_Lethality_shared_genes genes_associated_phenotype
[1,]                           380                        218
     length_lethal3
[1,]             89
89/218
[1] 0.4082569
# Run on non-reduced genes

Lethality_non_red_genes <- read.delim("../data/Lethality_non_red_genes.txt")
analyze_lethality(Lethality_non_red_genes)
     length_Lethality_shared_genes genes_associated_phenotype
[1,]                          7151                       4118
     length_lethal3
[1,]           1139
1139/4118
[1] 0.2765906
# Run on genes reduced in one species

Lethality_red_one_species <- read.delim("../data/Lethality_red_one_species.txt")
analyze_lethality(Lethality_red_one_species)
     length_Lethality_shared_genes genes_associated_phenotype
[1,]                          2330                       1286
     length_lethal3
[1,]            458
458/1286
[1] 0.3561431

Assessing significance of difference in proportions

# Diff between shared and non reduced
prop.test(c(89,1139),c(218,4118),correct=TRUE)

    2-sample test for equality of proportions with continuity
    correction

data:  c(89, 1139) out of c(218, 4118)
X-squared = 17.038, df = 1, p-value = 3.664e-05
alternative hypothesis: two.sided
95 percent confidence interval:
 0.06259038 0.20074222
sample estimates:
   prop 1    prop 2 
0.4082569 0.2765906 
# Diff between shared and reduced in 1 species
prop.test(c(89,458),c(218,1286),correct=TRUE)

    2-sample test for equality of proportions with continuity
    correction

data:  c(89, 458) out of c(218, 1286)
X-squared = 1.9681, df = 1, p-value = 0.1606
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.02086789  0.12509549
sample estimates:
   prop 1    prop 2 
0.4082569 0.3561431