An Analysis on Genomic Correlation for Gallstone Susceptibility

Sanjita Srinath

doi:10.62329/NMYG6782

Original Research Paper

Exploratory Analysis

Contents

install.packages('qtl')

install.packages('ggcorrplot')

library(qtl)
library(ggplot2)
library(ggcorrplot)
source("myFunctions.R")

# Read genetic data from the data set
cross1 <- read.cross("csv", file = "Lyons1.csv", 
                     genotypes = c("D", "H", "C"), 
                     na.strings = "-", alleles = c("D", "C"))

# Plot a jittermap of the genetic data - jitters the data so no two markers are on the same spot
jittermap(cross1)

# Summarize the genetic data
summary(cross1)

# Extract the names of phenotypes
names(cross1$pheno)

# Estimate recombination frequencies - Helps us to understand how clean the data is
cross1 <- est.rf(cross1)

 --Read the following data:
	 278  individuals
	 109  markers
	 15  phenotypes
 --Cross type: f2

  This is an object of class "cross".
  It is too complex to print, so we provide just this summary.
    F2 intercross

    No. individuals:    278 

    No. phenotypes:     15 
    Percent phenotyped: 100 100 100 100 97.5 97.5 97.5 96.8 97.5 97.5 97.5 97.5 
                        99.6 99.6 98.9 

    No. chromosomes:    20 
        Autosomes:      1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 
        X chr:          X 

    Total markers:      109 
    No. markers:        6 10 4 7 4 9 5 4 6 7 7 4 6 4 4 5 4 4 4 5 
    Percent genotyped:  97.4 
    Genotypes (%):    
          Autosomes:        DD:24.3      DC:50.6      CC:25.0  not CC:0.0  
                        not DD:0.0  
       X chromosome:        DY:49.2      CY:50.8 
    F2 intercross

    No. individuals:    278 

    No. phenotypes:     15 
    Percent phenotyped: 100 100 100 100 97.5 97.5 97.5 96.8 97.5 97.5 97.5 97.5 
                        99.6 99.6 98.9 

    No. chromosomes:    20 
        Autosomes:      1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 
        X chr:          X 

    Total markers:      109 
    No. markers:        6 10 4 7 4 9 5 4 6 7 7 4 6 4 4 5 4 4 4 5 
    Percent genotyped:  97.4 
    Genotypes (%):    
          Autosomes:        DD:24.3      DC:50.6      CC:25.0  not CC:0.0  
                        not DD:0.0  
       X chromosome:        DY:49.2      CY:50.8

    F2 intercross

    No. individuals:    278 

    No. phenotypes:     15 
    Percent phenotyped: 100 100 100 100 97.5 97.5 97.5 96.8 97.5 97.5 97.5 97.5 
                        99.6 99.6 98.9 

    No. chromosomes:    20 
        Autosomes:      1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 
        X chr:          X 

    Total markers:      109 
    No. markers:        6 10 4 7 4 9 5 4 6 7 7 4 6 4 4 5 4 4 4 5 
    Percent genotyped:  97.4 
    Genotypes (%):    
          Autosomes:        DD:24.3      DC:50.6      CC:25.0  not CC:0.0  
                        not DD:0.0  
       X chromosome:        DY:49.2      CY:50.8

 [1] "mouse_id"           "sex"                "pgm"               
 [4] "liver_wt"           "gallstone_presence" "gallstone_score"   
 [7] "gallstone_count"    "gallstone_weight"   "gallstone_sandy"   
[10] "gallstone_solid"    "ChMC_agg"           "ChMC_ind"          
[13] "chol"               "HDL_log"            "nonHDL"

Plot 1¶

plotRF(cross1) #Plots the pairwise recombination data to understand how clean the data is.
#Clean if there are no large red spots

plot.map(cross1) #Plots the positions of the genetic markers on the chromosomes

plotMissing(cross1) #Plots the locations of missing genotypes on the chromosome map, we don't want may missing spots

gallscore <- cross1$pheno$gallstone_score
gallcount <- cross1$pheno$gallstone_count
gallweight <- cross1$pheno$gallstone_weight
chmcagg <- cross1$pheno$ChMC_agg

#Looking to see if the distribution of the data is approximately normal. 
par(mfrow = c(2, 2))
qqnorm(gallscore)
qqline(gallscore)

qqnorm(gallcount)
qqline(gallcount)

qqnorm(gallweight)
qqline(gallweight)

qqnorm(chmcagg)
qqline(chmcagg)
par(mfrow = c(1,1))

#Setting up the basic settings for the main scan for all the variables. 
#We are using a step size of 2 and the Haldane method to map the data. 
cross1 <- calc.genoprob(cross1, step=2.0, off.end = 0.0, error.prob = 1.0e-4, 
                        map.function = "haldane", stepwidth = "fixed")
cross1 <- sim.geno(cross1, step=2.0, off.end = 0.0, error.prob = 1.0e-4, 
                        map.function = "haldane", stepwidth = "fixed")

#Creating a mainscan analysis for the gallscore data using 100 permutations
cross1.scan.gallscore <- scanone(cross1, pheno.col = gallscore, model = "normal", method = "em")
cross1.scan.gallscore.perm <- scanone(cross1, pheno.col = gallscore, model = "normal", method = "em", n.perm = 100)

Permutation 5 
Permutation 10 
Permutation 15 
Permutation 20 
Permutation 25 
Permutation 30 
Permutation 35 
Permutation 40 
Permutation 45 
Permutation 50 
Permutation 55 
Permutation 60 
Permutation 65 
Permutation 70 
Permutation 75 
Permutation 80 
Permutation 85 
Permutation 90 
Permutation 95 
Permutation 100

#Plotting the gallscore main scan data with threshold lines
plot(cross1.scan.gallscore, main = "Main Scan Plot of Gallscore")
thresh <- summary(cross1.scan.gallscore.perm, alpha = c(0.37, 0.10, 0.05))
abline(h=thresh[1], col = "blue")
abline(h=thresh[2], col = "green")
abline(h=thresh[3], col = "red")

#Creating a summary of the main scan
summary(cross1.scan.gallscore, perm = cross1.scan.gallscore.perm, alpha = 0.05, pvalues = TRUE)
summary(cross1.scan.gallscore, perm = cross1.scan.gallscore.perm, alpha = 0.10, pvalues = TRUE)
summary(cross1.scan.gallscore, perm = cross1.scan.gallscore.perm, alpha = 0.37, pvalues = TRUE)

score <- find.marker(cross1, 2, 53.8)

#Creating an effect plot for the gallscore mainscan run
effectplot(cross1, pheno.col=gallscore, mname1=score)

#Creating a mainscan analysis for the gall count data using 100 permutations
cross1.scan.gallcount <- scanone(cross1, pheno.col = gallcount, model = "normal", method = "hk") #question about pheno.col #

cross1.scan.gallcount.perm <- scanone(cross1, pheno.col = gallcount, model = "normal", method = "hk", n.perm = 100) #question about pheno.col #

Permutation 5 
Permutation 10 
Permutation 15 
Permutation 20 
Permutation 25 
Permutation 30 
Permutation 35 
Permutation 40 
Permutation 45 
Permutation 50 
Permutation 55 
Permutation 60 
Permutation 65 
Permutation 70 
Permutation 75 
Permutation 80 
Permutation 85 
Permutation 90 
Permutation 95 
Permutation 100

#Plotting the gallscore main scan data with threshold lines
plot(cross1.scan.gallcount, main = "Main Scan Plot of Gall Count")
thresh <- summary(cross1.scan.gallcount.perm, alpha = c(0.37, 0.10, 0.05))
abline(h=thresh[1], col = "blue")
abline(h=thresh[2], col = "green")
abline(h=thresh[3], col = "red")

#Creating a summary of the main scan
summary(cross1.scan.gallcount)

#Creating a summary of the main scan with 95% confidence
summary(cross1.scan.gallcount, perm = cross1.scan.gallcount.perm, alpha = 0.05, pvalues = T)

#Creating an effect plot for the gall count mainscan run
par(mfrow = c(1,2))
count1 <- find.marker(cross1, 6, 7.81)
effectplot(cross1, pheno.col=gallcount, mname1=count1)
count2<- find.marker(cross1, 8, 60.14)
effectplot(cross1, pheno.col=gallcount, mname1=count2)
par(mfrow = c(1,1))

cross1.scan.gallweight <- scanone(cross1, pheno.col = gallweight, model = "normal", method = "em") #question about pheno.col #
cross1.scan.gallweight.perm <- scanone(cross1, pheno.col = gallweight, model = "normal", method = "em", n.perm = 100) #question about pheno.col #

Permutation 5 
Permutation 10 
Permutation 15 
Permutation 20 
Permutation 25 
Permutation 30 
Permutation 35 
Permutation 40 
Permutation 45 
Permutation 50 
Permutation 55 
Permutation 60 
Permutation 65 
Permutation 70 
Permutation 75 
Permutation 80 
Permutation 85 
Permutation 90 
Permutation 95 
Permutation 100

plot(cross1.scan.gallweight, main = "Main Scan Plot of Gall Weight")
thresh <- summary(cross1.scan.gallweight.perm, alpha = c(0.37, 0.10, 0.05))
abline(h=thresh[1], col = "blue")
abline(h=thresh[2], col = "green")
abline(h=thresh[3], col = "red")

summary(cross1.scan.gallweight)
summary(cross1.scan.gallweight, perm = cross1.scan.gallweight.perm, alpha = 0.05)
summary(cross1.scan.gallweight, perm = cross1.scan.gallweight.perm, alpha = 0.10)
summary(cross1.scan.gallweight, perm = cross1.scan.gallweight.perm, alpha = 0.37, pvalue = TRUE)

weight <- find.marker(cross1, 8, 61.7)
effectplot(cross1, pheno.col=gallweight, mname1=weight)

cross1.scan.chmcagg <- scanone(cross1, pheno.col = chmcagg, model = "normal", method = "em") #question about pheno.col #
cross1.scan.chmcagg.perm <- scanone(cross1, pheno.col = chmcagg, model = "normal", method = "em", n.perm = 100) #question about pheno.col #

Permutation 5 
Permutation 10 
Permutation 15 
Permutation 20 
Permutation 25 
Permutation 30 
Permutation 35 
Permutation 40 
Permutation 45 
Permutation 50 
Permutation 55 
Permutation 60 
Permutation 65 
Permutation 70 
Permutation 75 
Permutation 80 
Permutation 85 
Permutation 90 
Permutation 95 
Permutation 100

plot(cross1.scan.chmcagg, main = "Main Scan Plot of Severity of Cholesterol Monohydrate Crystals, aggregates")
thresh <- summary(cross1.scan.chmcagg.perm, alpha = c(0.37, 0.10, 0.05))
abline(h=thresh[1], col = "blue")
abline(h=thresh[2], col = "green")
abline(h=thresh[3], col = "red")

summary(cross1.scan.chmcagg)
summary(cross1.scan.chmcagg, perm = cross1.scan.chmcagg.perm, alpha = 0.05, pvalues = T)

chol <- find.marker(cross1, 6, 57.8)
effectplot(cross1, pheno.col=chmcagg, mname1=chol)

augmented <-  mqmaugment(cross1, minprob=0.1, verbose = TRUE)

INFO: Received a valid cross file type: f2 .

INFO: Number of individuals: 278 .
INFO: Number of chr: 19 .
INFO: Number of markers: 104 .
INFO: VALGRIND MEMORY DEBUG BARRIERE TRIGGERED
INFO: Done with augmentation
# Unique individuals before augmentation:278
# Unique selected individuals:278
# Marker p individual:104
# Individuals after augmentation:1343
INFO: Data augmentation succesfull
INFO: DATA-Augmentation took:  1.281  seconds

par(mfrow=c(1,2))
geno.image(cross1)
geno.image(augmented)

summary(cross1)
summary(augmented)

ex <- mqmscan(augmented)
ex1 <- scanone(cross1)

max(ex)
max(ex1)

real_markers <- mqmextractmarkers(ex)
find.marker(augmented, chr=18, pos=5)

markers2 <-mqmextractmarkers(ex1)
find.marker(cross1, chr=18, pos=8.46)


multitoset <- find.markerindex(augmented, "D18Mit64")
setcofactors <- mqmsetcofactors(augmented, cofactors=multitoset)
mqm_co1 <- mqmscan(augmented, setcofactors)

heatmap(cor(cross1$pheno[,4:15], use="complete.obs"))

par(mfrow = c(1,1))
plot(ex, col = "red")
plot(ex1, add=TRUE, col = "green")
plot(mqm_co1, add= TRUE, col = "blue")

data <- read.csv("LyonsNoQTL.csv", header = TRUE)
summary(data)
data[] <- lapply(data, as.numeric)
summary(data)
data <- na.omit(data)

corr <- round(cor(data), 1)
corrchart <- 
  ggcorrplot(corr, hc.order = TRUE,
             type = "lower",
             lab = TRUE,
             lab_size = 3,
             method="circle",
             colors = c("tomato2", "white", "springgreen3"),
             title="Correlogram of Factors for Gallstone Suscebtibility",
             ggtheme=theme_bw)
corrchart

      pgm            liver_wt      gallstone_presence gallstone_score   
 Min.   :0.0000   Min.   :0.9225   Length:278         Length:278        
 1st Qu.:0.0000   1st Qu.:1.3244   Class :character   Class :character  
 Median :1.0000   Median :1.5524   Mode  :character   Mode  :character  
 Mean   :0.7266   Mean   :1.6115                                        
 3rd Qu.:1.0000   3rd Qu.:1.8027                                        
 Max.   :1.0000   Max.   :3.2405                                        
 gallstone_count    gallstone_weight   gallstone_sandy    gallstone_solid   
 Length:278         Length:278         Length:278         Length:278        
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
   ChMC_agg           ChMC_ind             chol             HDL_log         
 Length:278         Length:278         Length:278         Length:278        
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
    nonHDL         
 Length:278        
 Class :character  
 Mode  :character

      pgm            liver_wt      gallstone_presence gallstone_score
 Min.   :0.0000   Min.   :0.9225   Min.   :0.0000     Min.   :0.000  
 1st Qu.:0.0000   1st Qu.:1.3244   1st Qu.:0.0000     1st Qu.:0.000  
 Median :1.0000   Median :1.5524   Median :1.0000     Median :2.000  
 Mean   :0.7266   Mean   :1.6115   Mean   :0.6015     Mean   :1.288  
 3rd Qu.:1.0000   3rd Qu.:1.8027   3rd Qu.:1.0000     3rd Qu.:2.000  
 Max.   :1.0000   Max.   :3.2405   Max.   :1.0000     Max.   :2.000  
                                   NA's   :7          NA's   :7      
 gallstone_count  gallstone_weight gallstone_sandy gallstone_solid 
 Min.   : 0.000   Min.   :0.0000   Min.   :0.000   Min.   :0.0000  
 1st Qu.: 0.000   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000  
 Median : 0.000   Median :0.0000   Median :1.000   Median :0.0000  
 Mean   : 1.129   Mean   :0.1508   Mean   :1.443   Mean   :0.6494  
 3rd Qu.: 0.000   3rd Qu.:0.0000   3rd Qu.:3.000   3rd Qu.:0.0000  
 Max.   :28.000   Max.   :3.1600   Max.   :4.000   Max.   :4.0000  
 NA's   :7        NA's   :9        NA's   :7       NA's   :7       
    ChMC_agg        ChMC_ind           chol          HDL_log     
 Min.   :0.000   Min.   :0.0000   Min.   : 67.0   Min.   :1.079  
 1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:185.0   1st Qu.:1.806  
 Median :1.000   Median :0.0000   Median :223.0   Median :1.892  
 Mean   :1.601   Mean   :0.7232   Mean   :243.9   Mean   :1.901  
 3rd Qu.:3.000   3rd Qu.:1.0000   3rd Qu.:285.0   3rd Qu.:1.991  
 Max.   :4.000   Max.   :4.0000   Max.   :668.0   Max.   :2.455  
 NA's   :7       NA's   :7        NA's   :1       NA's   :1      
     nonHDL     
 Min.   :  8.0  
 1st Qu.:102.0  
 Median :137.0  
 Mean   :158.0  
 3rd Qu.:194.5  
 Max.   :631.0  
 NA's   :3

An Analysis on Genomic Correlation for Gallstone Susceptibility