Contents
Exploratory Analysis
install.packages('qtl')
install.packages('ggcorrplot')
library(qtl)
library(ggplot2)
library(ggcorrplot)
source("myFunctions.R")
# Read genetic data from the data set
cross1 <- read.cross("csv", file = "Lyons1.csv",
genotypes = c("D", "H", "C"),
na.strings = "-", alleles = c("D", "C"))
# Plot a jittermap of the genetic data - jitters the data so no two markers are on the same spot
jittermap(cross1)
# Summarize the genetic data
summary(cross1)
# Extract the names of phenotypes
names(cross1$pheno)
# Estimate recombination frequencies - Helps us to understand how clean the data is
cross1 <- est.rf(cross1)
--Read the following data:
278 individuals
109 markers
15 phenotypes
--Cross type: f2
This is an object of class "cross".
It is too complex to print, so we provide just this summary.
F2 intercross
No. individuals: 278
No. phenotypes: 15
Percent phenotyped: 100 100 100 100 97.5 97.5 97.5 96.8 97.5 97.5 97.5 97.5
99.6 99.6 98.9
No. chromosomes: 20
Autosomes: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
X chr: X
Total markers: 109
No. markers: 6 10 4 7 4 9 5 4 6 7 7 4 6 4 4 5 4 4 4 5
Percent genotyped: 97.4
Genotypes (%):
Autosomes: DD:24.3 DC:50.6 CC:25.0 not CC:0.0
not DD:0.0
X chromosome: DY:49.2 CY:50.8
F2 intercross
No. individuals: 278
No. phenotypes: 15
Percent phenotyped: 100 100 100 100 97.5 97.5 97.5 96.8 97.5 97.5 97.5 97.5
99.6 99.6 98.9
No. chromosomes: 20
Autosomes: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
X chr: X
Total markers: 109
No. markers: 6 10 4 7 4 9 5 4 6 7 7 4 6 4 4 5 4 4 4 5
Percent genotyped: 97.4
Genotypes (%):
Autosomes: DD:24.3 DC:50.6 CC:25.0 not CC:0.0
not DD:0.0
X chromosome: DY:49.2 CY:50.8
F2 intercross
No. individuals: 278
No. phenotypes: 15
Percent phenotyped: 100 100 100 100 97.5 97.5 97.5 96.8 97.5 97.5 97.5 97.5
99.6 99.6 98.9
No. chromosomes: 20
Autosomes: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
X chr: X
Total markers: 109
No. markers: 6 10 4 7 4 9 5 4 6 7 7 4 6 4 4 5 4 4 4 5
Percent genotyped: 97.4
Genotypes (%):
Autosomes: DD:24.3 DC:50.6 CC:25.0 not CC:0.0
not DD:0.0
X chromosome: DY:49.2 CY:50.8
[1] "mouse_id" "sex" "pgm"
[4] "liver_wt" "gallstone_presence" "gallstone_score"
[7] "gallstone_count" "gallstone_weight" "gallstone_sandy"
[10] "gallstone_solid" "ChMC_agg" "ChMC_ind"
[13] "chol" "HDL_log" "nonHDL"
Plot 1¶
plotRF(cross1) #Plots the pairwise recombination data to understand how clean the data is.
#Clean if there are no large red spots
plot.map(cross1) #Plots the positions of the genetic markers on the chromosomes
plotMissing(cross1) #Plots the locations of missing genotypes on the chromosome map, we don't want may missing spots
gallscore <- cross1$pheno$gallstone_score
gallcount <- cross1$pheno$gallstone_count
gallweight <- cross1$pheno$gallstone_weight
chmcagg <- cross1$pheno$ChMC_agg
#Looking to see if the distribution of the data is approximately normal.
par(mfrow = c(2, 2))
qqnorm(gallscore)
qqline(gallscore)
qqnorm(gallcount)
qqline(gallcount)
qqnorm(gallweight)
qqline(gallweight)
qqnorm(chmcagg)
qqline(chmcagg)
par(mfrow = c(1,1))
#Setting up the basic settings for the main scan for all the variables.
#We are using a step size of 2 and the Haldane method to map the data.
cross1 <- calc.genoprob(cross1, step=2.0, off.end = 0.0, error.prob = 1.0e-4,
map.function = "haldane", stepwidth = "fixed")
cross1 <- sim.geno(cross1, step=2.0, off.end = 0.0, error.prob = 1.0e-4,
map.function = "haldane", stepwidth = "fixed")
#Creating a mainscan analysis for the gallscore data using 100 permutations
cross1.scan.gallscore <- scanone(cross1, pheno.col = gallscore, model = "normal", method = "em")
cross1.scan.gallscore.perm <- scanone(cross1, pheno.col = gallscore, model = "normal", method = "em", n.perm = 100)
Permutation 5
Permutation 10
Permutation 15
Permutation 20
Permutation 25
Permutation 30
Permutation 35
Permutation 40
Permutation 45
Permutation 50
Permutation 55
Permutation 60
Permutation 65
Permutation 70
Permutation 75
Permutation 80
Permutation 85
Permutation 90
Permutation 95
Permutation 100
#Plotting the gallscore main scan data with threshold lines
plot(cross1.scan.gallscore, main = "Main Scan Plot of Gallscore")
thresh <- summary(cross1.scan.gallscore.perm, alpha = c(0.37, 0.10, 0.05))
abline(h=thresh[1], col = "blue")
abline(h=thresh[2], col = "green")
abline(h=thresh[3], col = "red")
#Creating a summary of the main scan
summary(cross1.scan.gallscore, perm = cross1.scan.gallscore.perm, alpha = 0.05, pvalues = TRUE)
summary(cross1.scan.gallscore, perm = cross1.scan.gallscore.perm, alpha = 0.10, pvalues = TRUE)
summary(cross1.scan.gallscore, perm = cross1.scan.gallscore.perm, alpha = 0.37, pvalues = TRUE)
score <- find.marker(cross1, 2, 53.8)
chr pos lod pval
c2.loc52 2 53.81 3.465909 0.05
chr pos lod pval
c2.loc52 2 53.81 3.465909 0.05
chr pos lod pval
c2.loc52 2 53.81 3.465909 0.05
#Creating an effect plot for the gallscore mainscan run
effectplot(cross1, pheno.col=gallscore, mname1=score)
#Creating a mainscan analysis for the gall count data using 100 permutations
cross1.scan.gallcount <- scanone(cross1, pheno.col = gallcount, model = "normal", method = "hk") #question about pheno.col #
cross1.scan.gallcount.perm <- scanone(cross1, pheno.col = gallcount, model = "normal", method = "hk", n.perm = 100) #question about pheno.col #
Permutation 5
Permutation 10
Permutation 15
Permutation 20
Permutation 25
Permutation 30
Permutation 35
Permutation 40
Permutation 45
Permutation 50
Permutation 55
Permutation 60
Permutation 65
Permutation 70
Permutation 75
Permutation 80
Permutation 85
Permutation 90
Permutation 95
Permutation 100
#Plotting the gallscore main scan data with threshold lines
plot(cross1.scan.gallcount, main = "Main Scan Plot of Gall Count")
thresh <- summary(cross1.scan.gallcount.perm, alpha = c(0.37, 0.10, 0.05))
abline(h=thresh[1], col = "blue")
abline(h=thresh[2], col = "green")
abline(h=thresh[3], col = "red")
#Creating a summary of the main scan
summary(cross1.scan.gallcount)
chr pos lod
c1.loc12 1 18.2720 1.0358377
c2.loc54 2 55.8100 2.4818912
c3.loc12 3 22.8210 1.6860510
c4.loc56 4 62.9400 0.6648552
c5.loc8 5 22.3920 1.4779557
c6.loc6 6 7.8060 3.3167583
c7.loc40 7 57.0920 0.5287561
c8.loc58 8 60.1399 3.3838621
D9Mit24 9 55.0690 1.0149602
c10.loc36 10 39.4400 1.7501659
c11.loc14 11 17.5172 0.7970075
c12.loc16 12 21.5212 0.2576332
D13Mit19 13 20.9800 0.6285824
D14Mit98 14 7.0800 0.7944163
D15Mit13 15 1.8430 0.5275161
D16Mit125 16 28.3720 0.4506691
D17Mit39 17 45.6360 0.1730631
c18.loc40 18 44.4590 0.9625787
c19.loc14 19 17.3831 1.0240878
DXMit149 X 50.1421 0.4851280
#Creating a summary of the main scan with 95% confidence
summary(cross1.scan.gallcount, perm = cross1.scan.gallcount.perm, alpha = 0.05, pvalues = T)
chr pos lod
#Creating an effect plot for the gall count mainscan run
par(mfrow = c(1,2))
count1 <- find.marker(cross1, 6, 7.81)
effectplot(cross1, pheno.col=gallcount, mname1=count1)
count2<- find.marker(cross1, 8, 60.14)
effectplot(cross1, pheno.col=gallcount, mname1=count2)
par(mfrow = c(1,1))
cross1.scan.gallweight <- scanone(cross1, pheno.col = gallweight, model = "normal", method = "em") #question about pheno.col #
cross1.scan.gallweight.perm <- scanone(cross1, pheno.col = gallweight, model = "normal", method = "em", n.perm = 100) #question about pheno.col #
Permutation 5
Permutation 10
Permutation 15
Permutation 20
Permutation 25
Permutation 30
Permutation 35
Permutation 40
Permutation 45
Permutation 50
Permutation 55
Permutation 60
Permutation 65
Permutation 70
Permutation 75
Permutation 80
Permutation 85
Permutation 90
Permutation 95
Permutation 100
plot(cross1.scan.gallweight, main = "Main Scan Plot of Gall Weight")
thresh <- summary(cross1.scan.gallweight.perm, alpha = c(0.37, 0.10, 0.05))
abline(h=thresh[1], col = "blue")
abline(h=thresh[2], col = "green")
abline(h=thresh[3], col = "red")
summary(cross1.scan.gallweight)
summary(cross1.scan.gallweight, perm = cross1.scan.gallweight.perm, alpha = 0.05)
summary(cross1.scan.gallweight, perm = cross1.scan.gallweight.perm, alpha = 0.10)
summary(cross1.scan.gallweight, perm = cross1.scan.gallweight.perm, alpha = 0.37, pvalue = TRUE)
chr pos lod
D1Mit111 1 77.0800 0.4588458
D2Mit94 2 47.9283 1.8282845
D3Mit203 3 10.8210 2.2305869
c4.loc48 4 54.9400 1.4608689
c5.loc14 5 28.3920 0.2794237
c6.loc6 6 7.8060 2.3989050
c7.loc32 7 49.0920 0.4237640
D8Mit88 8 61.6553 2.5305306
D9Mit58 9 2.9329 1.3151057
D10Mit14 10 66.7460 0.9105951
c11.loc56 11 59.5172 0.4367384
D12Mit7 12 52.8018 0.3695158
D13Mit19 13 20.9800 0.5646876
D14Mit98 14 7.0800 1.3039561
D15Mit13 15 1.8430 0.5238677
c16.loc28 16 30.3314 1.5196316
D17Mit51 17 19.7377 0.4222457
D18Mit12 18 19.2935 0.4372503
D19Mit68 19 3.3831 0.5713871
cX.loc10 X 30.5861 0.3452822
chr pos lod
chr pos lod
chr pos lod pval
D8Mit88 8 61.6553 2.530531 0.3
weight <- find.marker(cross1, 8, 61.7)
effectplot(cross1, pheno.col=gallweight, mname1=weight)
cross1.scan.chmcagg <- scanone(cross1, pheno.col = chmcagg, model = "normal", method = "em") #question about pheno.col #
cross1.scan.chmcagg.perm <- scanone(cross1, pheno.col = chmcagg, model = "normal", method = "em", n.perm = 100) #question about pheno.col #
Permutation 5
Permutation 10
Permutation 15
Permutation 20
Permutation 25
Permutation 30
Permutation 35
Permutation 40
Permutation 45
Permutation 50
Permutation 55
Permutation 60
Permutation 65
Permutation 70
Permutation 75
Permutation 80
Permutation 85
Permutation 90
Permutation 95
Permutation 100
plot(cross1.scan.chmcagg, main = "Main Scan Plot of Severity of Cholesterol Monohydrate Crystals, aggregates")
thresh <- summary(cross1.scan.chmcagg.perm, alpha = c(0.37, 0.10, 0.05))
abline(h=thresh[1], col = "blue")
abline(h=thresh[2], col = "green")
abline(h=thresh[3], col = "red")
summary(cross1.scan.chmcagg)
summary(cross1.scan.chmcagg, perm = cross1.scan.chmcagg.perm, alpha = 0.05, pvalues = T)
chr pos lod
c1.loc50 1 56.2720 1.8988937
c2.loc10 2 11.8100 2.7450925
D3Mit203 3 10.8210 0.6221937
c4.loc62 4 68.9400 1.0033607
c5.loc26 5 40.3920 0.7538688
c6.loc56 6 57.8060 3.7181813
c7.loc6 7 23.0920 1.8590810
D8Mit292 8 21.1590 0.6659462
c9.loc16 9 18.9329 1.2202756
D10Mit31 10 35.2646 1.3198858
c11.loc66 11 69.5172 1.9083007
D12Mit182 12 5.5212 1.0605595
D13Mit171 13 64.2300 1.9139606
c14.loc10 14 17.0800 0.9177335
c15.loc28 15 29.8430 0.7208705
c16.loc36 16 38.3314 1.5445545
c17.loc8 17 10.6828 0.4379050
D18Mit48 18 52.1500 0.5528384
c19.loc22 19 25.3831 1.7179564
cX.loc34 X 54.5861 0.1416067
chr pos lod
chol <- find.marker(cross1, 6, 57.8)
effectplot(cross1, pheno.col=chmcagg, mname1=chol)
augmented <- mqmaugment(cross1, minprob=0.1, verbose = TRUE)
INFO: Received a valid cross file type: f2 .
INFO: Number of individuals: 278 .
INFO: Number of chr: 19 .
INFO: Number of markers: 104 .
INFO: VALGRIND MEMORY DEBUG BARRIERE TRIGGERED
INFO: Done with augmentation
# Unique individuals before augmentation:278
# Unique selected individuals:278
# Marker p individual:104
# Individuals after augmentation:1343
INFO: Data augmentation succesfull
INFO: DATA-Augmentation took: 1.281 seconds
par(mfrow=c(1,2))
geno.image(cross1)
geno.image(augmented)
summary(cross1)
summary(augmented)
ex <- mqmscan(augmented)
ex1 <- scanone(cross1)
max(ex)
max(ex1)
real_markers <- mqmextractmarkers(ex)
find.marker(augmented, chr=18, pos=5)
markers2 <-mqmextractmarkers(ex1)
find.marker(cross1, chr=18, pos=8.46)
multitoset <- find.markerindex(augmented, "D18Mit64")
setcofactors <- mqmsetcofactors(augmented, cofactors=multitoset)
mqm_co1 <- mqmscan(augmented, setcofactors)
F2 intercross
No. individuals: 278
No. phenotypes: 15
Percent phenotyped: 100 100 100 100 97.5 97.5 97.5 96.8 97.5 97.5 97.5 97.5
99.6 99.6 98.9
No. chromosomes: 20
Autosomes: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
X chr: X
Total markers: 109
No. markers: 6 10 4 7 4 9 5 4 6 7 7 4 6 4 4 5 4 4 4 5
Percent genotyped: 97.4
Genotypes (%):
Autosomes: DD:24.3 DC:50.6 CC:25.0 not CC:0.0
not DD:0.0
X chromosome: DY:49.2 CY:50.8
F2 intercross
No. individuals: 1343
No. phenotypes: 15
Percent phenotyped: 100 100 100 100 97.7 97.7 97.7 97.3 97.7 97.7 97.7 97.7
99.9 99.9 98.4
No. chromosomes: 19
Autosomes: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
Total markers: 104
No. markers: 6 10 4 7 4 9 5 4 6 7 7 4 6 4 4 5 4 4 4
Percent genotyped: 100
Genotypes (%): DD:24.1 DC:51.5 CC:24.5 not CC:0.0 not DD:0.0
chr pos (cM) LOD mouse_id info LOD*info
c18.loc5 18 5 2.379694 0.728202 1.732898
chr pos lod
c18.loc4 18 8.459 3.257336
[1] "D18Mit64"
[1] "D18Mit64"
heatmap(cor(cross1$pheno[,4:15], use="complete.obs"))
par(mfrow = c(1,1))
plot(ex, col = "red")
plot(ex1, add=TRUE, col = "green")
plot(mqm_co1, add= TRUE, col = "blue")
data <- read.csv("LyonsNoQTL.csv", header = TRUE)
summary(data)
data[] <- lapply(data, as.numeric)
summary(data)
data <- na.omit(data)
corr <- round(cor(data), 1)
corrchart <-
ggcorrplot(corr, hc.order = TRUE,
type = "lower",
lab = TRUE,
lab_size = 3,
method="circle",
colors = c("tomato2", "white", "springgreen3"),
title="Correlogram of Factors for Gallstone Suscebtibility",
ggtheme=theme_bw)
corrchart
pgm liver_wt gallstone_presence gallstone_score
Min. :0.0000 Min. :0.9225 Length:278 Length:278
1st Qu.:0.0000 1st Qu.:1.3244 Class :character Class :character
Median :1.0000 Median :1.5524 Mode :character Mode :character
Mean :0.7266 Mean :1.6115
3rd Qu.:1.0000 3rd Qu.:1.8027
Max. :1.0000 Max. :3.2405
gallstone_count gallstone_weight gallstone_sandy gallstone_solid
Length:278 Length:278 Length:278 Length:278
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
ChMC_agg ChMC_ind chol HDL_log
Length:278 Length:278 Length:278 Length:278
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
nonHDL
Length:278
Class :character
Mode :character
pgm liver_wt gallstone_presence gallstone_score
Min. :0.0000 Min. :0.9225 Min. :0.0000 Min. :0.000
1st Qu.:0.0000 1st Qu.:1.3244 1st Qu.:0.0000 1st Qu.:0.000
Median :1.0000 Median :1.5524 Median :1.0000 Median :2.000
Mean :0.7266 Mean :1.6115 Mean :0.6015 Mean :1.288
3rd Qu.:1.0000 3rd Qu.:1.8027 3rd Qu.:1.0000 3rd Qu.:2.000
Max. :1.0000 Max. :3.2405 Max. :1.0000 Max. :2.000
NA's :7 NA's :7
gallstone_count gallstone_weight gallstone_sandy gallstone_solid
Min. : 0.000 Min. :0.0000 Min. :0.000 Min. :0.0000
1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
Median : 0.000 Median :0.0000 Median :1.000 Median :0.0000
Mean : 1.129 Mean :0.1508 Mean :1.443 Mean :0.6494
3rd Qu.: 0.000 3rd Qu.:0.0000 3rd Qu.:3.000 3rd Qu.:0.0000
Max. :28.000 Max. :3.1600 Max. :4.000 Max. :4.0000
NA's :7 NA's :9 NA's :7 NA's :7
ChMC_agg ChMC_ind chol HDL_log
Min. :0.000 Min. :0.0000 Min. : 67.0 Min. :1.079
1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:185.0 1st Qu.:1.806
Median :1.000 Median :0.0000 Median :223.0 Median :1.892
Mean :1.601 Mean :0.7232 Mean :243.9 Mean :1.901
3rd Qu.:3.000 3rd Qu.:1.0000 3rd Qu.:285.0 3rd Qu.:1.991
Max. :4.000 Max. :4.0000 Max. :668.0 Max. :2.455
NA's :7 NA's :7 NA's :1 NA's :1
nonHDL
Min. : 8.0
1st Qu.:102.0
Median :137.0
Mean :158.0
3rd Qu.:194.5
Max. :631.0
NA's :3