Skip to contentSkip to frontmatterSkip to Backmatter

Exploratory Analysis

install.packages('qtl')
install.packages('ggcorrplot')
library(qtl)
library(ggplot2)
library(ggcorrplot)
source("myFunctions.R")
# Read genetic data from the data set
cross1 <- read.cross("csv", file = "Lyons1.csv", 
                     genotypes = c("D", "H", "C"), 
                     na.strings = "-", alleles = c("D", "C"))

# Plot a jittermap of the genetic data - jitters the data so no two markers are on the same spot
jittermap(cross1)

# Summarize the genetic data
summary(cross1)

# Extract the names of phenotypes
names(cross1$pheno)

# Estimate recombination frequencies - Helps us to understand how clean the data is
cross1 <- est.rf(cross1)
 --Read the following data:
	 278  individuals
	 109  markers
	 15  phenotypes
 --Cross type: f2 
This is an object of class "cross". It is too complex to print, so we provide just this summary. F2 intercross No. individuals: 278 No. phenotypes: 15 Percent phenotyped: 100 100 100 100 97.5 97.5 97.5 96.8 97.5 97.5 97.5 97.5 99.6 99.6 98.9 No. chromosomes: 20 Autosomes: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 X chr: X Total markers: 109 No. markers: 6 10 4 7 4 9 5 4 6 7 7 4 6 4 4 5 4 4 4 5 Percent genotyped: 97.4 Genotypes (%): Autosomes: DD:24.3 DC:50.6 CC:25.0 not CC:0.0 not DD:0.0 X chromosome: DY:49.2 CY:50.8 F2 intercross No. individuals: 278 No. phenotypes: 15 Percent phenotyped: 100 100 100 100 97.5 97.5 97.5 96.8 97.5 97.5 97.5 97.5 99.6 99.6 98.9 No. chromosomes: 20 Autosomes: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 X chr: X Total markers: 109 No. markers: 6 10 4 7 4 9 5 4 6 7 7 4 6 4 4 5 4 4 4 5 Percent genotyped: 97.4 Genotypes (%): Autosomes: DD:24.3 DC:50.6 CC:25.0 not CC:0.0 not DD:0.0 X chromosome: DY:49.2 CY:50.8
F2 intercross No. individuals: 278 No. phenotypes: 15 Percent phenotyped: 100 100 100 100 97.5 97.5 97.5 96.8 97.5 97.5 97.5 97.5 99.6 99.6 98.9 No. chromosomes: 20 Autosomes: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 X chr: X Total markers: 109 No. markers: 6 10 4 7 4 9 5 4 6 7 7 4 6 4 4 5 4 4 4 5 Percent genotyped: 97.4 Genotypes (%): Autosomes: DD:24.3 DC:50.6 CC:25.0 not CC:0.0 not DD:0.0 X chromosome: DY:49.2 CY:50.8
Loading...

Plot 1

plotRF(cross1) #Plots the pairwise recombination data to understand how clean the data is.
#Clean if there are no large red spots
Loading...
plot.map(cross1) #Plots the positions of the genetic markers on the chromosomes
Loading...
plotMissing(cross1) #Plots the locations of missing genotypes on the chromosome map, we don't want may missing spots
Loading...
gallscore <- cross1$pheno$gallstone_score
gallcount <- cross1$pheno$gallstone_count
gallweight <- cross1$pheno$gallstone_weight
chmcagg <- cross1$pheno$ChMC_agg
#Looking to see if the distribution of the data is approximately normal. 
par(mfrow = c(2, 2))
qqnorm(gallscore)
qqline(gallscore)

qqnorm(gallcount)
qqline(gallcount)

qqnorm(gallweight)
qqline(gallweight)

qqnorm(chmcagg)
qqline(chmcagg)
par(mfrow = c(1,1))
Loading...
#Setting up the basic settings for the main scan for all the variables. 
#We are using a step size of 2 and the Haldane method to map the data. 
cross1 <- calc.genoprob(cross1, step=2.0, off.end = 0.0, error.prob = 1.0e-4, 
                        map.function = "haldane", stepwidth = "fixed")
cross1 <- sim.geno(cross1, step=2.0, off.end = 0.0, error.prob = 1.0e-4, 
                        map.function = "haldane", stepwidth = "fixed")
#Creating a mainscan analysis for the gallscore data using 100 permutations
cross1.scan.gallscore <- scanone(cross1, pheno.col = gallscore, model = "normal", method = "em")
cross1.scan.gallscore.perm <- scanone(cross1, pheno.col = gallscore, model = "normal", method = "em", n.perm = 100) 
Permutation 5 
Permutation 10 
Permutation 15 
Permutation 20 
Permutation 25 
Permutation 30 
Permutation 35 
Permutation 40 
Permutation 45 
Permutation 50 
Permutation 55 
Permutation 60 
Permutation 65 
Permutation 70 
Permutation 75 
Permutation 80 
Permutation 85 
Permutation 90 
Permutation 95 
Permutation 100 
#Plotting the gallscore main scan data with threshold lines
plot(cross1.scan.gallscore, main = "Main Scan Plot of Gallscore")
thresh <- summary(cross1.scan.gallscore.perm, alpha = c(0.37, 0.10, 0.05))
abline(h=thresh[1], col = "blue")
abline(h=thresh[2], col = "green")
abline(h=thresh[3], col = "red")
Loading...
#Creating a summary of the main scan
summary(cross1.scan.gallscore, perm = cross1.scan.gallscore.perm, alpha = 0.05, pvalues = TRUE)
summary(cross1.scan.gallscore, perm = cross1.scan.gallscore.perm, alpha = 0.10, pvalues = TRUE)
summary(cross1.scan.gallscore, perm = cross1.scan.gallscore.perm, alpha = 0.37, pvalues = TRUE)

score <- find.marker(cross1, 2, 53.8)
Loading...
Loading...
Loading...
#Creating an effect plot for the gallscore mainscan run
effectplot(cross1, pheno.col=gallscore, mname1=score)
Loading...
#Creating a mainscan analysis for the gall count data using 100 permutations
cross1.scan.gallcount <- scanone(cross1, pheno.col = gallcount, model = "normal", method = "hk") #question about pheno.col #

cross1.scan.gallcount.perm <- scanone(cross1, pheno.col = gallcount, model = "normal", method = "hk", n.perm = 100) #question about pheno.col #
Permutation 5 
Permutation 10 
Permutation 15 
Permutation 20 
Permutation 25 
Permutation 30 
Permutation 35 
Permutation 40 
Permutation 45 
Permutation 50 
Permutation 55 
Permutation 60 
Permutation 65 
Permutation 70 
Permutation 75 
Permutation 80 
Permutation 85 
Permutation 90 
Permutation 95 
Permutation 100 
#Plotting the gallscore main scan data with threshold lines
plot(cross1.scan.gallcount, main = "Main Scan Plot of Gall Count")
thresh <- summary(cross1.scan.gallcount.perm, alpha = c(0.37, 0.10, 0.05))
abline(h=thresh[1], col = "blue")
abline(h=thresh[2], col = "green")
abline(h=thresh[3], col = "red")
Loading...
#Creating a summary of the main scan
summary(cross1.scan.gallcount)
Loading...
#Creating a summary of the main scan with 95% confidence
summary(cross1.scan.gallcount, perm = cross1.scan.gallcount.perm, alpha = 0.05, pvalues = T)
Loading...
#Creating an effect plot for the gall count mainscan run
par(mfrow = c(1,2))
count1 <- find.marker(cross1, 6, 7.81)
effectplot(cross1, pheno.col=gallcount, mname1=count1)
count2<- find.marker(cross1, 8, 60.14)
effectplot(cross1, pheno.col=gallcount, mname1=count2)
par(mfrow = c(1,1))
Loading...
cross1.scan.gallweight <- scanone(cross1, pheno.col = gallweight, model = "normal", method = "em") #question about pheno.col #
cross1.scan.gallweight.perm <- scanone(cross1, pheno.col = gallweight, model = "normal", method = "em", n.perm = 100) #question about pheno.col #
Permutation 5 
Permutation 10 
Permutation 15 
Permutation 20 
Permutation 25 
Permutation 30 
Permutation 35 
Permutation 40 
Permutation 45 
Permutation 50 
Permutation 55 
Permutation 60 
Permutation 65 
Permutation 70 
Permutation 75 
Permutation 80 
Permutation 85 
Permutation 90 
Permutation 95 
Permutation 100 
plot(cross1.scan.gallweight, main = "Main Scan Plot of Gall Weight")
thresh <- summary(cross1.scan.gallweight.perm, alpha = c(0.37, 0.10, 0.05))
abline(h=thresh[1], col = "blue")
abline(h=thresh[2], col = "green")
abline(h=thresh[3], col = "red")
Loading...
summary(cross1.scan.gallweight)
summary(cross1.scan.gallweight, perm = cross1.scan.gallweight.perm, alpha = 0.05)
summary(cross1.scan.gallweight, perm = cross1.scan.gallweight.perm, alpha = 0.10)
summary(cross1.scan.gallweight, perm = cross1.scan.gallweight.perm, alpha = 0.37, pvalue = TRUE)
Loading...
Loading...
Loading...
Loading...
weight <- find.marker(cross1, 8, 61.7)
effectplot(cross1, pheno.col=gallweight, mname1=weight)
Loading...
cross1.scan.chmcagg <- scanone(cross1, pheno.col = chmcagg, model = "normal", method = "em") #question about pheno.col #
cross1.scan.chmcagg.perm <- scanone(cross1, pheno.col = chmcagg, model = "normal", method = "em", n.perm = 100) #question about pheno.col #
Permutation 5 
Permutation 10 
Permutation 15 
Permutation 20 
Permutation 25 
Permutation 30 
Permutation 35 
Permutation 40 
Permutation 45 
Permutation 50 
Permutation 55 
Permutation 60 
Permutation 65 
Permutation 70 
Permutation 75 
Permutation 80 
Permutation 85 
Permutation 90 
Permutation 95 
Permutation 100 
plot(cross1.scan.chmcagg, main = "Main Scan Plot of Severity of Cholesterol Monohydrate Crystals, aggregates")
thresh <- summary(cross1.scan.chmcagg.perm, alpha = c(0.37, 0.10, 0.05))
abline(h=thresh[1], col = "blue")
abline(h=thresh[2], col = "green")
abline(h=thresh[3], col = "red")
Loading...
summary(cross1.scan.chmcagg)
summary(cross1.scan.chmcagg, perm = cross1.scan.chmcagg.perm, alpha = 0.05, pvalues = T)
Loading...
Loading...
chol <- find.marker(cross1, 6, 57.8)
effectplot(cross1, pheno.col=chmcagg, mname1=chol)
Loading...
augmented <-  mqmaugment(cross1, minprob=0.1, verbose = TRUE)
INFO: Received a valid cross file type: f2 .
INFO: Number of individuals: 278 .
INFO: Number of chr: 19 .
INFO: Number of markers: 104 .
INFO: VALGRIND MEMORY DEBUG BARRIERE TRIGGERED
INFO: Done with augmentation
# Unique individuals before augmentation:278
# Unique selected individuals:278
# Marker p individual:104
# Individuals after augmentation:1343
INFO: Data augmentation succesfull
INFO: DATA-Augmentation took:  1.281  seconds
par(mfrow=c(1,2))
geno.image(cross1)
geno.image(augmented)
Loading...
summary(cross1)
summary(augmented)

ex <- mqmscan(augmented)
ex1 <- scanone(cross1)

max(ex)
max(ex1)

real_markers <- mqmextractmarkers(ex)
find.marker(augmented, chr=18, pos=5)

markers2 <-mqmextractmarkers(ex1)
find.marker(cross1, chr=18, pos=8.46)


multitoset <- find.markerindex(augmented, "D18Mit64")
setcofactors <- mqmsetcofactors(augmented, cofactors=multitoset)
mqm_co1 <- mqmscan(augmented, setcofactors)
F2 intercross No. individuals: 278 No. phenotypes: 15 Percent phenotyped: 100 100 100 100 97.5 97.5 97.5 96.8 97.5 97.5 97.5 97.5 99.6 99.6 98.9 No. chromosomes: 20 Autosomes: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 X chr: X Total markers: 109 No. markers: 6 10 4 7 4 9 5 4 6 7 7 4 6 4 4 5 4 4 4 5 Percent genotyped: 97.4 Genotypes (%): Autosomes: DD:24.3 DC:50.6 CC:25.0 not CC:0.0 not DD:0.0 X chromosome: DY:49.2 CY:50.8
F2 intercross No. individuals: 1343 No. phenotypes: 15 Percent phenotyped: 100 100 100 100 97.7 97.7 97.7 97.3 97.7 97.7 97.7 97.7 99.9 99.9 98.4 No. chromosomes: 19 Autosomes: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Total markers: 104 No. markers: 6 10 4 7 4 9 5 4 6 7 7 4 6 4 4 5 4 4 4 Percent genotyped: 100 Genotypes (%): DD:24.1 DC:51.5 CC:24.5 not CC:0.0 not DD:0.0
Loading...
Loading...
Loading...
Loading...
heatmap(cor(cross1$pheno[,4:15], use="complete.obs"))
Loading...
par(mfrow = c(1,1))
plot(ex, col = "red")
plot(ex1, add=TRUE, col = "green")
plot(mqm_co1, add= TRUE, col = "blue")
Loading...
data <- read.csv("LyonsNoQTL.csv", header = TRUE)
summary(data)
data[] <- lapply(data, as.numeric)
summary(data)
data <- na.omit(data)

corr <- round(cor(data), 1)
corrchart <- 
  ggcorrplot(corr, hc.order = TRUE,
             type = "lower",
             lab = TRUE,
             lab_size = 3,
             method="circle",
             colors = c("tomato2", "white", "springgreen3"),
             title="Correlogram of Factors for Gallstone Suscebtibility",
             ggtheme=theme_bw)
corrchart
pgm liver_wt gallstone_presence gallstone_score Min. :0.0000 Min. :0.9225 Length:278 Length:278 1st Qu.:0.0000 1st Qu.:1.3244 Class :character Class :character Median :1.0000 Median :1.5524 Mode :character Mode :character Mean :0.7266 Mean :1.6115 3rd Qu.:1.0000 3rd Qu.:1.8027 Max. :1.0000 Max. :3.2405 gallstone_count gallstone_weight gallstone_sandy gallstone_solid Length:278 Length:278 Length:278 Length:278 Class :character Class :character Class :character Class :character Mode :character Mode :character Mode :character Mode :character ChMC_agg ChMC_ind chol HDL_log Length:278 Length:278 Length:278 Length:278 Class :character Class :character Class :character Class :character Mode :character Mode :character Mode :character Mode :character nonHDL Length:278 Class :character Mode :character
pgm liver_wt gallstone_presence gallstone_score Min. :0.0000 Min. :0.9225 Min. :0.0000 Min. :0.000 1st Qu.:0.0000 1st Qu.:1.3244 1st Qu.:0.0000 1st Qu.:0.000 Median :1.0000 Median :1.5524 Median :1.0000 Median :2.000 Mean :0.7266 Mean :1.6115 Mean :0.6015 Mean :1.288 3rd Qu.:1.0000 3rd Qu.:1.8027 3rd Qu.:1.0000 3rd Qu.:2.000 Max. :1.0000 Max. :3.2405 Max. :1.0000 Max. :2.000 NA's :7 NA's :7 gallstone_count gallstone_weight gallstone_sandy gallstone_solid Min. : 0.000 Min. :0.0000 Min. :0.000 Min. :0.0000 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 Median : 0.000 Median :0.0000 Median :1.000 Median :0.0000 Mean : 1.129 Mean :0.1508 Mean :1.443 Mean :0.6494 3rd Qu.: 0.000 3rd Qu.:0.0000 3rd Qu.:3.000 3rd Qu.:0.0000 Max. :28.000 Max. :3.1600 Max. :4.000 Max. :4.0000 NA's :7 NA's :9 NA's :7 NA's :7 ChMC_agg ChMC_ind chol HDL_log Min. :0.000 Min. :0.0000 Min. : 67.0 Min. :1.079 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:185.0 1st Qu.:1.806 Median :1.000 Median :0.0000 Median :223.0 Median :1.892 Mean :1.601 Mean :0.7232 Mean :243.9 Mean :1.901 3rd Qu.:3.000 3rd Qu.:1.0000 3rd Qu.:285.0 3rd Qu.:1.991 Max. :4.000 Max. :4.0000 Max. :668.0 Max. :2.455 NA's :7 NA's :7 NA's :1 NA's :1 nonHDL Min. : 8.0 1st Qu.:102.0 Median :137.0 Mean :158.0 3rd Qu.:194.5 Max. :631.0 NA's :3
Loading...