Skip to content

Latest commit

 

History

History
1041 lines (843 loc) · 36.2 KB

basil-genetics.md

File metadata and controls

1041 lines (843 loc) · 36.2 KB

Genetic Mapping and QTL Analysis

Load packages

library(qtl)
library(ASMap)
library(dplyr)
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.2.3
library(LinkageMapView)

Import genotypic data. for now, all markers are on 1 chr

data<-read.cross("csvr",".","raw_data.csv",genotypes=c("a","h","b"),map.function="kosambi")
##  --Read the following data:
##   152  individuals
##   791  markers
##   14  phenotypes

## Warning in summary.cross(cross): Some chromosomes > 1000 cM in length; there may be a problem with the genetic map.
##   (Perhaps it is in basepairs?)

##  --Cross type: f2

Convert cross to bcsft type f2 to use ASMap package

data<-convert2bcsft(data,F.gen=2,estimate.map=F)
cat("Phenos:")
## Phenos:
cbind(head(phenames(data),-1))
##       [,1]      
##  [1,] "AFF"     
##  [2,] "AFS"     
##  [3,] "AFL"     
##  [4,] "AG"      
##  [5,] "AYS"     
##  [6,] "AYL"     
##  [7,] "FOB1"    
##  [8,] "FOB2"    
##  [9,] "FOB3"    
## [10,] "F_AUDPC" 
## [11,] "Cold"    
## [12,] "FOB3_bin"
## [13,] "BDM"
n<-11 #normal phenotypes count

Data Pre-processing

Look at the pattern of missing data. Black pixels indicate missing genotypes.

plotMissing(data)

Omit the individuals with more than 85% of total markers -> less than 672 markers. Briefly estimate map first.

data<-quickEst(data,map.function="kosambi")
sg<-statGen(data,bychr=F,stat.type="miss",id='index')
data1<-subset(data,ind=om<-sg$miss<(totmar(data))*0.85)
cat(if(F%in%om) which(!om)else "no","ind omitted for missing > 85% mar")
## no ind omitted for missing > 85% mar

Plot the number of genotyped markers per individual as well as the number of genotyped individuals per marker

par(mfrow=c(1,2), las=1)
plot(ntyped(data1), ylim=c(0,totmar(data1)+50),ylab="No. typed markers",main="Markers by Individual")
mtext("A",adj=0)
plot(ntyped(data1, "mar"), ylim=c(0,(nind(data1)+15)),ylab="No. typed individuals",main="Individuals by Marker")
mtext("B",adj=0)

Plot the genotype frequencies per individual

g <- pull.geno(data1)
gfreq <- apply(g, 1, function(a) table(factor(a, levels=1:3)))
gfreq <- t(t(gfreq) / colSums(gfreq))
par(mfrow=c(1,3), las=1)
for(i in 1:3){
  plot(gfreq[i,], ylab="Genotype frequency",ylim=c(0,1))
  abline(h=mean(gfreq[i,]),lty=3,col="red",lwd=3)
  mtext(c("AA", "AB", "BB")[i])}
par(mfrow=c(1,1));title("Genotypes' Frequency and Segregation Ratio",line = 2.5)

Compare the genotypes for all pairs of individuals

cg<-comparegeno(data1);cgr<-cg[lower.tri((cg))]
hist(cgr, breaks=seq(0, 1, len=101),xlab="No. matching genotypes", main="Matching Pairs of Individuals")
rug(cgr)
#mark the outlier with a red arrow 
x<-max(cgr)
arrow.plot(x,50,0,-1,true.angle = T,arrow.ex=30, length=.1,col='red', lwd=2)
text(x,70,paste0(round(100*x,1),"%"),adj=c(0.5,0.2))

Omit individuals with more than 90% identical markers

wh<-which(cg>0.9,arr=T)
data2<-subset(data1,ind=-wh[,2])
cat(nind(data1)-nind(data2),"ind omitted for",paste0(round(100*x,1),"%"),"identical geno\n",paste0("#", data1$pheno$index[wh[,2]]))
## 2 ind omitted for 96.9% identical geno
##  #132 #134

Pull out markers from cross temporarily

cat(totmar(data2),'total mar\n')
## 791 total mar
data3<-pullCross(data2,type="missing",pars=list(miss.thresh=0.1))
cat(totmar(data3),'total mar\n')
## 760 total mar
cat(totmar(data2)-totmar(data3),"mar pulled for missing\n")
## 31 mar pulled for missing
data4<-pullCross(data3,type="seg.distortion",pars=list(seg.thresh=0.001))
cat(totmar(data4),'total mar\n')
## 726 total mar
cat(totmar(data3)-totmar(data4),"mar pulled for seg. distortion\n")
## 34 mar pulled for seg. distortion
data5<-pullCross(data4,type="co.located")
cat(totmar(data5),'total mar\n')
## 567 total mar
cat(totmar(data4)-totmar(data5),"mar pulled for co. located")
## 159 mar pulled for co. located

Plot optional p. values to determine distance threshold for marker clustering. I chose pValue= 1e-7 (on y axis), meaning->> split linkage groups with more than 30cM gap between markers, according to 150 ind population (on x axis)

cat(nind(data5),"individuals are used to cluster markers")
## 150 individuals are used to cluster markers
pValue(dist=seq(25,40,by=5),pop.size=110:190)

LOD(Logarithm of the Odds)= statistical measure of the likelihood that two loci (positions on a chromosome) are linked and therefore inherited together, rather than assorting independently.

Map Construction

Form linkage groups with LOD=7, raw map

data5<-mstmap(data5,bychr=F,p.value=1e-7,id='index')
## Number of linkage groups: 28
## The size of the linkage groups are: 39   32  47  33  27  12  19  10  38  18  28  19  27  8   11  40  11  13  24  17  16  20  17  1   21  1   16  2   
## The number of bins in each linkage group: 34 23  35  26  20  9   13  8   30  14  25  15  20  8   10  35  11  11  18  14  14  13  17  1   20  1   12  2   
plotMap(data5,alternate.chrid = T)

Profile individuals’ genotype statistics

Omit missing/ double crossover (dxo)/ xo statistics outlier individuals

data6<-subsetCross(data5,ind=!pg$xo.lambda)
cat(nind(data5)-nind(data6),"ind omitted by profileGen")
## 1 ind omitted by profileGen

Double check dxo. An unusually high rate of double crossovers might indicate genotyping errors

pg1<-profileGen(data6,bychr=F,stat.type=c("xo","dxo","miss"),id="index",xo.lambda=median(pg1$stat$xo),layout=c(1,3),lty=2,cex=0.7)

Re-construct map. The genotyping errors can distort the distances between markers, the order of the inputted markers is respected

data7<-mstmap(data6,bychr=F,anchor=T,p.value=1e-7,id='index')
## Number of linkage groups: 28
## The size of the linkage groups are: 39   18  28  19  27  8   11  40  11  13  24  32  17  16  20  17  1   21  1   16  2   47  33  27  12  19  10  38  
## The number of bins in each linkage group: 33 14  25  15  20  8   10  35  11  11  17  22  14  13  13  16  1   20  1   12  2   35  26  19  9   13  8   30  

Push back markers to the map

cat(totmar(data7),'total mar ')
## 567 total mar
data8<-pushCross(data7,type="co.located")
cat(totmar(data8),'total mar ')
## 726 total mar
cat(totmar(data8)-totmar(data7),"mar pushed for co. located")
## 159 mar pushed for co. located

Re-construct final map by adding markers to existing LGs (linkage groups)

Drop LGs with less than 2 markers

mndrop<-markernames(data9,nmar(data9)<2)
data10<-drop.markers(data9,as.character(mndrop))
cat(totmar(data10),'total mar')
## 724 total mar
cat(totmar(data9)-totmar(data10),"mar omitted for LG < 2 mar")
## 2 mar omitted for LG < 2 mar

Rename chr by numerical order

x<-1:nchr(data10)
for (i in x) {names(data10$geno)[i]<-paste0("LG",i)}

Plot genetic map illustration, final map

plot.map(data10,alternate.chrid=T)

LGs summary table

summaryMap(data10)
##         n.mar length ave.spacing max.spacing
## LG1        66   58.5         0.9        12.8
## LG2        13   34.4         2.9        14.6
## LG3        24   22.3         1.0         4.7
## LG4        33   64.0         2.0        11.5
## LG5        22   29.9         1.4         6.3
## LG6        23   36.1         1.6        15.2
## LG7        20   33.8         1.8        21.9
## LG8        26   48.9         2.0        11.2
## LG9        29   52.6         1.9        17.0
## LG10       18   43.9         2.6         8.9
## LG11       16   56.6         3.8        25.6
## LG12        2    1.9         1.9         1.9
## LG13       67   56.7         0.9         9.9
## LG14       34   58.7         1.8        10.8
## LG15       47   20.8         0.5         4.5
## LG16       12   41.1         3.7        18.5
## LG17       19   46.6         2.6        10.4
## LG18       10   15.7         1.7         8.5
## LG19       53  114.3         2.2        23.2
## LG20       44   60.7         1.4        14.4
## LG21       19   40.9         2.3        19.6
## LG22       27   77.1         3.0        14.7
## LG23        8   14.9         2.1         4.1
## LG24       11   49.3         4.9        26.9
## LG25       70   47.8         0.7         6.5
## LG26       11   86.3         8.6        28.8
## overall   724 1213.7         1.7        28.8

Map Evaluation

Estimate recombination fraction

data10<-est.rf(data10)

Heatmap of LOD and Rf

heatMap(data10,lmax = 70,main='')
mtext("Pairwise Recombination Fractions and LOD Scores",cex=1.1,line=3.2,adj=0.4,font=2)

Plot Pairwise LOD vs. Rf

rf<-pull.rf(data10);lod<-pull.rf(data10,what="lod")
plot(as.numeric(rf),as.numeric(lod),xlab="Recombination fraction",ylab="LOD score",main=paste("Pairwise LOD vs. Rf for",totmar(data10),"Markers"))

The evaluation looks good:

The heatmap is continuous with low gradient along the chr meaning that the markers within a chromosome are gradually distant from each other.

On the LOD/rf scatter plot there is a trend line with no outliers.

Exploratory Data Analysis

Jitter map- to avoid marker overlaping by slightly adding gaps between them

data<-jittermap(data10)

Create a df of parents phenotype

cold_values <- c(5, 5, 4, 6, 7, 9, 5, 6, 6, 7, NA, NA, 7, 8, 7, 8, 8, 8, 9, 9, 9, 8, NA, NA)
faudpc_values <- c(rep(0, 12), rep(c(90, 142), each = 6))
parents <- data.frame(rbind(matrix(0,12,9), matrix(5,12,9)),faudpc_values, Cold = cold_values,row.names = c(paste0("P1_", 1:12), paste0("DP_", 1:12)))
ant_values <-c(5, 4.5, 4.5, 4.5, 4.5, 3.5, 4, 4.5, 4, 4.5, 4, 4)
parents[13:24,1:3] <-matrix(ant_values,12,3)
colnames(parents) <-phenames(data)[1:n]

Check the phenotypic distribution

Plot histograms or barplots

cbind(phenames(data)) 
##       [,1]      
##  [1,] "AFF"     
##  [2,] "AFS"     
##  [3,] "AFL"     
##  [4,] "AG"      
##  [5,] "AYS"     
##  [6,] "AYL"     
##  [7,] "FOB1"    
##  [8,] "FOB2"    
##  [9,] "FOB3"    
## [10,] "F_AUDPC" 
## [11,] "Cold"    
## [12,] "FOB3_bin"
## [13,] "BDM"     
## [14,] "index"

Plot the correlation matrices using corrplot

mat<-cor(pull.pheno(data,c(1:10,12,13,11)),use = "complete.obs")
corrplot(mat,type = 'upper',method = "color", addCoef.col = "orange", tl.col = "black", tl.srt = 35,tl.cex=0.7,number.cex=0.5) 

Now, for each group of phenotypes

Fusarium

mat<-cor(pull.pheno(data,c(7:10,12)),use = "complete.obs")
corrplot(mat, type='upper',method = "color", addCoef.col = "white", tl.col = "black", tl.srt = 35)

Anthocyanin

mat<-cor(pull.pheno(data,1:6),use = "complete.obs")
corrplot(mat, type='upper',method = "color", addCoef.col = "white", tl.col = "black", tl.srt = 35)

There is a strong correlation between the phenotypes in the same group. the different conditions (in anthocyanin) or repetitions (in fusarium) had a limited effect on the resulting phenotype.

QTL Analysis

Perform genome scans to identify QTL

## Loading precomputed genome scans from genome_scans.Rdata, scan2_part1.Rdata and scan2_lod_part2.Rdata to save time.

Setting a QTL detection threshold according to permutation tests. The lower the precentage (5%), the better significance of the QTL.

(thresh1.hk<-summary(scan1perm,alpha=c(0.63,0.1,0.05))) 
## LOD thresholds (1000 permutations)
##      AFF  AFS  AFL   AG  AYS  AYL FOB1 FOB2 FOB3 F_AUDPC Cold
## 63% 2.24 2.24 2.22 2.21 2.24 2.22 2.24 2.22 2.25    2.24 2.26
## 10% 3.30 3.29 3.28 3.26 3.31 3.35 3.40 3.38 3.28    3.36 3.33
## 5%  3.66 3.65 3.66 3.63 3.66 3.65 3.71 3.74 3.64    3.75 3.67
(thresh2.em<-summary(scan1perm.bin,alpha=c(0.63,0.1,0.05)))
## LOD thresholds (1000 permutations)
##     FOB3_bin  BDM
## 63%     2.21 2.26
## 10%     3.29 3.40
## 5%      3.54 3.88

Scan for additional QTL after reducing the masking effect of QTL with major peaks.

Threshold colors

thcol<-c('blue','green','red')

Plot LOD curves per phenotype, QTL peaks

for(i in 1:(length(phenames(data))-3)){
  p<-phenames(data)[i]
  plot(scan1,lodcolumn=i,main=p,ylab="LOD",bandcol="gray80",ylim=c(0,max(scan1[,2+i])+0.5),alternate.chrid = T)
    abline(h=thresh1.hk[,i],lty='dotted',lwd=2,col=thcol)
    for(j in 1:3){
      if(thresh1.hk[j,i]/par('usr')[4]<1){
        mtext(rownames(thresh1.hk)[j],side=4,font=2,adj=thresh1.hk[j,i]/(par('usr')[4]-0.2),col=thcol[j])
      }
    }
}

for(i in 1:2){
  p<-phenames(data)[i+n]
  plot(scan1.bin,lodcolumn=i,main=p,ylab="LOD",bandcol="gray80",ylim=c(0,max(scan1.bin[,2+i])+0.5),alternate.chrid = T)
    abline(h=thresh2.em[,i],lty='dotted',lwd=2,col=thcol)
    for(j in 1:3){
      if(thresh2.em[j,i]/par('usr')[4]<1){
        mtext(rownames(thresh2.em)[j],side=4,font=2,adj=thresh2.em[j,i]/(par('usr')[4]-0.2),col=thcol[j])
      }
    }
}

The add QTL scan found nothing

#normal
qtlist<-summary(scan1,perms=scan1perm,format="tabByCol",alpha=0.95,ci.function="bayesint",pvalues=T)
  data<-calc.genoprob(data,2,map.function="kosambi")
  qtlist.aq<-list()
  s.aq<-list()
  for (i in 1:n){
    p<-phenames(data)[i]
    if(!is.null(out.aq[[p]])){
      s<-summary(out.aq[[p]],format="tabByCol",perms=scan1perm[,p],alpha=0.95,ci.function="bayesint",pvalues=T)
      if(nrow(s[[1]])>0){
        qtlist.aq[p]<-s
        qtlist.aq[[p]]<-cbind.data.frame(Trait=p,qtlist.aq[[p]])
        s.aq[[p]]<-summary(out.aq[[p]],perms=scan1perm[,p],alpha=0.63)
        if(nrow(s.aq[[p]])>0){
          rqtl[[p]]<-addtoqtl(data,rqtl[[p]],s.aq[[p]][,1],s.aq[[p]][,2])
        }
      }
    }
  }
  for (i in 1:length(qtlist)){
    if(colnames(qtlist[[i]])[1]!="Trait"){
      qtlist[[i]]<-cbind.data.frame(Trait=names(qtlist[i]),qtlist[[i]])
    }
  }
  qtldf<-do.call(rbind.data.frame,c(qtlist,make.row.names=F))
  
  for (i in 1:n){
    if(names(qtlist[i])%in%names(qtlist.aq)){
      qtlist[[i]]<-rbind(qtlist[[i]],qtlist.aq[[phenames(data)[i]]])
    }
  }
  qtldf.aq<-do.call(rbind.data.frame,c(qtlist,make.row.names=F))
  
  #binary
  qtlist<-summary(scan1.bin,perms=scan1perm.bin,format="tabByCol",alpha=0.95,ci.function="bayesint",pvalues=T)
  data<-calc.genoprob(data,3,map.function="kosambi")
  for (i in 1:2){
    p<-phenames(data)[i+n]
    if(!is.null(out.aq.bin[[p]])){
      s<-summary(out.aq.bin[[p]],perms=scan1perm.bin[,p],alpha=0.95,format="tabByCol",ci.function="bayesint",pvalues=T)
      if(nrow(s[[1]])>0){
        qtlist.aq[p]<-s
        qtlist.aq[[p]]<-cbind.data.frame(Trait=p,qtlist.aq[[p]])
        s.aq[[p]]<-summary(out.aq.bin[[p]],perms=scan1perm.bin[,p],alpha=0.63)
        if(nrow(s.aq[[p]])>0){
          rqtl.bin[[p]]<-addtoqtl(data,rqtl.bin[[p]],s.aq[[p]][,1],s.aq[[p]][,2])
        }
      }
    }
  }
  for (i in 1:length(qtlist)){
    qtlist[[i]]<-cbind.data.frame(Trait=names(qtlist[i]),qtlist[[i]])
  }
  for (i in 1:length(qtlist)){
    qtldf<-rbind.data.frame(qtldf,qtlist[[i]],make.row.names=F)
  }
  for (i in 1:2){
    if(names(qtlist[i])%in%names(qtlist.aq)){
      qtlist[[i]]<-rbind(qtlist[[i]],qtlist.aq[[phenames(data)[i+n]]])
    }
  }
  qtldf.aq.bin<-do.call(rbind.data.frame,c(qtlist,make.row.names=F))
  qtldf.aq<-rbind(qtldf.aq,qtldf.aq.bin)

QTL summary as dataframe for final report. QTL at alp= 0.99 and sig *** levels

su<-1-summary(data)$missing.phe
qtldf.aq<-qtldf.aq%>%
  mutate("Len of LG"=round(chrlen(data)[chr],1),.after=chr)%>%
  mutate("Len of QTL"=round(ci.high-ci.low,1),.after="Len of LG")%>%
  mutate("Flanking markers"=paste0(chr,"_m",find.marker(data,chr,ci.low),"-",chr,"_m",find.marker(data,chr,ci.high)))%>%
  mutate("Central marker"=paste0(chr,"_m",find.marker(data,chr,pos)))%>%
  mutate("Pval"=paste0(pval,if_else(pval<0.63,"*",""),if_else(pval<0.1,"*",""), if_else(pval<0.05,"*","")))%>%
  select(!c(pval,ci.low,ci.high))%>%
  rename("QTL's LG"=chr)%>%
  mutate("No. Inds/% phenotyped"=paste0(nind(data)*su[find.pheno(data,Trait)]," ind / ",round(100*su[find.pheno(data,Trait)],1),"%"),.after=Trait)%>%
  mutate("pos"=round(pos,1))%>%
  mutate("lod"=round(lod,1))
qtldf.aq
##       Trait No. Inds/% phenotyped QTL's LG Len of LG Len of QTL  pos lod
## 1       AFF        149 ind / 100%     LG13      56.7       28.6 56.3 2.8
## 2       AFF        149 ind / 100%     LG14      58.7       57.2 12.0 2.6
## 3       AFF        149 ind / 100%     LG16      41.1       41.1 32.9 1.9
## 4       AFF        149 ind / 100%     LG17      46.6       32.0  1.1 1.9
## 5       AFF        149 ind / 100%     LG19     114.3       80.0 34.2 2.5
## 6       AFF        149 ind / 100%     LG20      60.7       26.0 18.0 2.9
## 7       AFF        149 ind / 100%     LG22      77.1       77.1 77.1 1.8
## 8       AFF        149 ind / 100%     LG24      49.3       22.0 31.7 3.1
## 9       AFS        149 ind / 100%     LG13      56.7       45.1 56.7 2.3
## 10      AFS        149 ind / 100%     LG14      58.7       52.4 12.0 2.7
## 11      AFS        149 ind / 100%     LG17      46.6       28.0  1.1 2.1
## 12      AFS        149 ind / 100%     LG19     114.3       82.0 34.2 2.5
## 13      AFS        149 ind / 100%     LG20      60.7       26.0 24.0 3.1
## 14      AFS        149 ind / 100%     LG24      49.3       22.0 31.7 3.3
## 15      AFS        149 ind / 100%     LG26      86.3       86.3  0.0 2.0
## 16      AFL        149 ind / 100%     LG13      56.7       17.1 56.3 2.8
## 17      AFL        149 ind / 100%     LG14      58.7       18.0  4.0 2.7
## 18      AFL        149 ind / 100%     LG16      41.1       15.1 32.9 2.3
## 19      AFL        149 ind / 100%     LG17      46.6       46.0  1.1 1.9
## 20      AFL        149 ind / 100%     LG19     114.3       80.8 36.0 2.5
## 21      AFL        149 ind / 100%     LG20      60.7       26.0 18.0 2.7
## 22      AFL        149 ind / 100%     LG24      49.3       22.0 31.7 2.8
## 23       AG        149 ind / 100%      LG2      34.4       22.4 31.4 2.0
## 24       AG        149 ind / 100%     LG13      56.7       41.7 56.3 2.3
## 25       AG        149 ind / 100%     LG14      58.7       58.0  4.0 1.9
## 26       AG        149 ind / 100%     LG16      41.1       41.1 32.9 2.2
## 27       AG        149 ind / 100%     LG19     114.3       48.0 15.9 2.5
## 28       AG        149 ind / 100%     LG20      60.7       26.0 24.0 2.4
## 29      AYS        149 ind / 100%     LG13      56.7       45.1 56.3 2.1
## 30      AYS        149 ind / 100%     LG16      41.1       41.1 34.0 1.9
## 31      AYS        149 ind / 100%     LG19     114.3       79.6 40.0 2.4
## 32      AYL        149 ind / 100%     LG13      56.7       56.7 56.3 1.8
## 33      AYL        149 ind / 100%     LG14      58.7       57.2 12.8 2.2
## 34      AYL        149 ind / 100%     LG16      41.1       17.1 34.6 1.9
## 35      AYL        149 ind / 100%     LG19     114.3       88.0 38.0 2.4
## 36     FOB1       117 ind / 78.5%      LG4      64.0       28.0  2.0 2.1
## 37     FOB1       117 ind / 78.5%     LG25      47.8       34.0  7.9 1.9
## 38     FOB2       118 ind / 79.2%     LG19     114.3       94.0 15.9 1.9
## 39     FOB2       118 ind / 79.2%     LG25      47.8       40.0  6.6 2.6
## 40     FOB3       118 ind / 79.2%      LG6      36.1       24.0 24.0 2.3
## 41     FOB3       118 ind / 79.2%     LG13      56.7       40.0 17.8 2.0
## 42     FOB3       118 ind / 79.2%     LG17      46.6       22.9 16.2 2.6
## 43     FOB3       118 ind / 79.2%     LG22      77.1       65.1 56.0 1.7
## 44     FOB3       118 ind / 79.2%     LG25      47.8       32.0  6.6 3.6
## 45  F_AUDPC       118 ind / 79.2%     LG25      47.8       32.0  6.6 3.1
## 46     Cold        149 ind / 100%     LG21      40.9       26.1 24.0 2.0
## 47 FOB3_bin       118 ind / 79.2%      LG6      36.1       22.0 27.0 2.5
## 48 FOB3_bin       118 ind / 79.2%      LG9      52.6       39.0 51.0 1.8
## 49 FOB3_bin       118 ind / 79.2%     LG13      56.7       56.7 17.8 2.1
## 50 FOB3_bin       118 ind / 79.2%     LG17      46.6       27.0 16.2 1.9
## 51 FOB3_bin       118 ind / 79.2%     LG20      60.7       60.7  3.0 1.9
## 52 FOB3_bin       118 ind / 79.2%     LG22      77.1       54.0 57.0 1.8
## 53 FOB3_bin       118 ind / 79.2%     LG25      47.8       17.3  6.6 4.5
## 54 FOB3_bin       118 ind / 79.2%     LG25      47.8       22.8 21.0 3.0
## 55      BDM        149 ind / 100%      LG6      36.1       26.0 27.0 3.1
## 56      BDM        149 ind / 100%     LG13      56.7       42.0 12.4 1.8
## 57      BDM        149 ind / 100%     LG19     114.3      108.0  8.4 2.7
## 58      BDM        149 ind / 100%     LG25      47.8       23.5 14.9 2.1
## 59      BDM        149 ind / 100%      LG6      36.1       13.9 10.1 2.4
##             Flanking markers Central marker     Pval
## 1  LG13_m213796-LG13_m152483    LG13_m95830   0.232*
## 2    LG14_m187879-LG14_m3973   LG14_m232116    0.34*
## 3     LG16_m98366-LG16_m1644   LG16_m256179    0.858
## 4     LG17_m66485-LG17_m2335    LG17_m26429    0.888
## 5     LG19_m62803-LG19_m3026   LG19_m164757   0.426*
## 6   LG20_m66136-LG20_m233715    LG20_m99144   0.207*
## 7   LG22_m172088-LG22_m55365    LG22_m55365    0.931
## 8  LG24_m115022-LG24_m158249   LG24_m115022   0.144*
## 9  LG13_m239247-LG13_m152483   LG13_m152483   0.608*
## 10 LG14_m187879-LG14_m140735   LG14_m232116   0.312*
## 11  LG17_m66485-LG17_m186932    LG17_m26429    0.762
## 12   LG19_m248944-LG19_m3026   LG19_m164757   0.432*
## 13  LG20_m66136-LG20_m233715   LG20_m249499   0.156*
## 14 LG24_m115022-LG24_m158249   LG24_m115022  0.097**
## 15  LG26_m228710-LG26_m99216   LG26_m228710    0.828
## 16 LG13_m116284-LG13_m152483    LG13_m95830   0.269*
## 17  LG14_m187879-LG14_m99155    LG14_m33308   0.303*
## 18   LG16_m256633-LG16_m1644   LG16_m256179   0.577*
## 19  LG17_m66485-LG17_m242212    LG17_m26429    0.895
## 20  LG19_m255575-LG19_m38508   LG19_m164757   0.423*
## 21  LG20_m66136-LG20_m233715    LG20_m99144   0.308*
## 22 LG24_m115022-LG24_m158249   LG24_m115022   0.269*
## 23    LG2_m27444-LG2_m221845     LG2_m23665    0.781
## 24 LG13_m110230-LG13_m152483    LG13_m95830   0.589*
## 25   LG14_m187879-LG14_m2633    LG14_m33308    0.837
## 26    LG16_m98366-LG16_m1644   LG16_m256179    0.639
## 27 LG19_m195831-LG19_m176482    LG19_m62803   0.461*
## 28  LG20_m66136-LG20_m233715   LG20_m249499    0.48*
## 29 LG13_m239247-LG13_m152483    LG13_m95830    0.734
## 30    LG16_m98366-LG16_m1644    LG16_m85999    0.833
## 31  LG19_m222416-LG19_m38508   LG19_m164757   0.532*
## 32 LG13_m131941-LG13_m152483    LG13_m95830    0.882
## 33   LG14_m187879-LG14_m3973   LG14_m232116    0.679
## 34   LG16_m256633-LG16_m1644    LG16_m85999    0.838
## 35  LG19_m195831-LG19_m38508   LG19_m164757   0.532*
## 36   LG4_m158167-LG4_m179103    LG4_m158167    0.703
## 37 LG25_m148704-LG25_m118219    LG25_m42909    0.871
## 38   LG19_m234493-LG19_m3026    LG19_m62803    0.859
## 39 LG25_m148704-LG25_m196841    LG25_m26868   0.333*
## 40    LG6_m249118-LG6_m34544     LG6_m51714   0.562*
## 41 LG13_m131941-LG13_m116284   LG13_m129765    0.814
## 42  LG17_m26429-LG17_m186932   LG17_m181813    0.38*
## 43  LG22_m204703-LG22_m55365     LG22_m3697     0.93
## 44 LG25_m148704-LG25_m198798    LG25_m26868  0.051**
## 45 LG25_m148704-LG25_m198798    LG25_m26868   0.158*
## 46  LG21_m78042-LG21_m103080   LG21_m168769    0.809
## 47    LG6_m102306-LG6_m34544     LG6_m51714   0.457*
## 48     LG9_m74935-LG9_m61458    LG9_m235200      0.9
## 49 LG13_m131941-LG13_m152483   LG13_m129765     0.71
## 50  LG17_m66485-LG17_m186932   LG17_m181813    0.856
## 51 LG20_m213579-LG20_m112128    LG20_m66136    0.832
## 52 LG22_m102393-LG22_m213924   LG22_m159568    0.897
## 53  LG25_m86542-LG25_m152494    LG25_m26868 0.007***
## 54 LG25_m148704-LG25_m108773   LG25_m121157   0.189*
## 55    LG6_m201323-LG6_m34544     LG6_m51714   0.158*
## 56 LG13_m131941-LG13_m116284    LG13_m32070    0.929
## 57 LG19_m195831-LG19_m251298   LG19_m234493   0.345*
## 58  LG25_m148704-LG25_m83002   LG25_m153207    0.749
## 59    LG6_m162041-LG6_m51714    LG6_m201323   0.514*

QTL pairs summary

#normal
c.thr1<-list()
for(i in 1:n){
  (thr1<-summary(scan2, perms=scan2perm, alpha=0.2,lodcolumn=i,pvalues=T))
  if(i==1){c.thr1[[phenames(data)[i]]]<-thr1
  }else c.thr1[[phenames(data)[i]]]<-thr1
}
for (i in 1:length(c.thr1)){
  if(nrow(c.thr1[[i]])>0&&colnames(c.thr1[[i]])[1]!="Trait"){
    c.thr1[[i]]<-cbind.data.frame(Trait=names(c.thr1[i]),c.thr1[[i]])
  }
}
thr1df<-do.call(rbind.data.frame,c(c.thr1,make.row.names=F))
#binary
c.thr2<-list()
for(i in 1:2){
  (thr2<-summary(scan2.bin, perms=scan2perm.bin, alpha=0.2,lodcolumn=i,pvalues=T))
  if(i==1){c.thr2[[phenames(data)[i+n]]]<-thr2
  }else c.thr2[[phenames(data)[i+n]]]<-thr2
}
for (i in 1:length(c.thr2)){
  if(nrow(c.thr2[[i]])>0&&colnames(c.thr2[[i]])[1]!="Trait"){
    c.thr2[[i]]<-cbind.data.frame(Trait=names(c.thr2[i]),c.thr2[[i]])
  }
}
thr2df<-do.call(rbind.data.frame,c(c.thr2,make.row.names=F))
thr<-rbind(thr1df,thr2df)

QTL pairs

#normal
data<-calc.genoprob(data,2,map.function="kosambi")
qtlist<-summary(scan1,perms=scan1perm,format="tabByCol",alpha=0.63,ci.function="bayesint")
sc2thr1<-summary(scan2perm,alpha=0.2)
#rearrange the threshold list
th<-vector('list',5)
for(j in 1:5){th[[j]]<-t(sc2thr1[[j]])}
m<-do.call('cbind',th)
dimnames(m)<-list(phenames(data)[1:n],names(sc2thr1)[1:5])
out.ap<-list();qtlpairs<-list();s.fq<-list();out.fq<-list()
for(i in 1:n){
  p<-phenames(data)[i]
  if(length(qtlist[[p]][,1])>0){
    out.ap[[p]]<-addpair(data,qtlist[[p]][,1],p,rqtl[[p]],method="hk",verbose=T)
    qtlpairs[[p]]<-summary(out.ap[[p]],thresholds=m[p,])
    s.fq[[p]]<-summary(out.fq[[p]]<- fitqtl(data,p,rqtl[[p]],method="hk",get.ests=T))
  }
}
## Warning in addpair(data, qtlist[[p]][, 1], p, rqtl[[p]], method = "hk", : Dropping 31 individuals with missing phenotypes.

## Warning in fitqtlengine(pheno = pheno, qtl = qtl, covar = covar, formula = formula, : Dropping 31 individuals with missing phenotypes.

## Warning in addpair(data, qtlist[[p]][, 1], p, rqtl[[p]], method = "hk", : Dropping 31 individuals with missing phenotypes.

## Warning in fitqtlengine(pheno = pheno, qtl = qtl, covar = covar, formula = formula, : Dropping 31 individuals with missing phenotypes.

## Warning in addpair(data, qtlist[[p]][, 1], p, rqtl[[p]], method = "hk", : Dropping 31 individuals with missing phenotypes.

## Warning in fitqtlengine(pheno = pheno, qtl = qtl, covar = covar, formula = formula, : Dropping 31 individuals with missing phenotypes.
#binary
data<-calc.genoprob(data,10,map.function="kosambi")
qtlist.bin<-summary(scan1.bin,perms=scan1perm.bin,format="tabByCol",alpha=0.63,ci.function="bayesint")
sc2thr2<-summary(scan2perm.bin,alpha=0.2)
#rearrange the threshold list
th<-vector('list',5)
for(j in 1:5){th[[j]]<-t(sc2thr2[[j]])}
m.bin<-do.call('cbind',th)
dimnames(m.bin)<-list(phenames(data)[n+1:2],names(sc2thr2)[1:5])
out.ap.bin<-list();rqtl2.bin<-list()
for(i in 1:2){
  p<-phenames(data)[i+n]
  if(length(qtlist.bin[[p]][,1])>0){
    q<-rbind(qtlist.bin[[p]][,-c(3,4)],s.aq[[p]])
    rqtl2.bin[[p]]<-refineqtl(data,p,makeqtl(data,q[,1],q[,2],what="prob"),maxit.fitqtl=1e+6,tol=0.05,method="hk",model="binary")
    out.ap.bin[[p]]<-addpair(data,q[,1],p,rqtl2.bin[[p]],maxit=1e+6,tol=0.2,method="hk",model="binary",verbose=T)
    qtlpairs[[p]]<-summary(out.ap.bin[[p]],thresholds=m.bin[p,])
    s.fq[[p]]<-summary(out.fq[[p]]<- fitqtl(data,p,rqtl2.bin[[p]],maxit=1e+6,tol=0.01,method="hk",model="binary",get.ests=T))
  }
}
## Warning in matchchr(chr, names(cross$geno)): Dropping duplicate chromosomes

## Warning in addpair(data, q[, 1], p, rqtl2.bin[[p]], maxit = 1e+06, tol = 0.2, : Dropping 31 individuals with missing phenotypes.

## Warning in fitqtlengine(pheno = pheno, qtl = qtl.obj, covar = covar, formula =
## formula, : Didn't converge.

## Warning in fitqtlengine(pheno = pheno, qtl = qtl, covar = covar, formula = formula, : Dropping 31 individuals with missing phenotypes.

## Warning in matchchr(chr, names(cross$geno)): Dropping duplicate chromosomes
for (i in 1:length(qtlpairs)){
if(nrow(qtlpairs[[i]])>0 && names(qtlpairs[[i]])[1]!="Trait"){
  qtlpairs[[i]]<-cbind.data.frame(Trait=names(qtlpairs[i]),qtlpairs[[i]])
  }
}
qtlpairsdf<-do.call(rbind.data.frame,c(qtlpairs,make.row.names=F))
##interacting QTL detected for AFL
qtlpairsdf<-c(qtlpairsdf,thr=m[qtlpairsdf[,1],])

One interactive QTL pair was found in the add pair scan for AFL phenotype.

Linkage Map view

alp<-0.63
colorlist<-RColorBrewer::brewer.pal(8,"Set1")

Genetic map to pdf

qtldf_initial<-\(){ 
  # make a df to pass qtl info
  qtldf <- data.frame(
    chr = character(),
    qtl = character(),
    so = numeric(),
    si = numeric(),
    ei = numeric(),
    eo = numeric(),
    col = character(),
    stringsAsFactors = F
  )
  return(qtldf)
}
outfile<-file.path("results/basil_linkage_map.pdf")
main<-"Basil Genetic Map"
qtldf<-qtldf_initial()

setting<-list(mapthis=data,outfile=outfile,main=main,ruler=T,maxnbrcolsfordups=2,dupnbr=T,lg.col='lightblue1',lgw=0.15,labdist=0.15,lgperrow=3)
do.call(lmv.linkage.plot,setting)
## Required pdf.width = 7.11466666666667

## Required pdf.height = 51.2411333333333

## Using pdf.width = 8

## Using pdf.height = 52

Anthocyanin QTL map

qtldf<-qtldf_initial()
for (i in 1:6) {
  qtls<-summary(scan1,perms=scan1perm,alpha=alp,lodcolumn=i)[,c(1:2,2+i)]
  if(nrow(qtls)>0){
    for (j in 1:nrow(qtls)) {
      bay <-bayesint(scan1[,c(1:2,2+i)],chr=qtls$chr[j])
      qtldf <- rbind(qtldf,
                     data.frame(
                       chr = qtls$chr[j],
                       qtl = colnames(bay)[3],
                       so = bay$pos[1],
                       si = bay$pos[2],
                       ei = bay$pos[2],
                       eo = bay$pos[3],
                       col=colorlist[(i+1)]))
    }
  }
}
outfile<-file.path("results/basil_QTLs.anthocyanin.pdf")
mapthese<-paste0("LG",sort(unique(as.numeric(qtldf$chr))))
main<-paste0("Basil Genetic Map + QTLs for Anthocyanin (",paste0(mapthese,collapse = ","),")")
setting<-modifyList(setting,list(outfile=outfile,mapthese=mapthese,main=main))
setting$qtldf<-qtldf
do.call(lmv.linkage.plot,setting)
## Required pdf.width = 12.865

## Required pdf.height = 14.6325333333333

## Using pdf.width = 13

## Using pdf.height = 15

Fusarium QTL map

qtldf<-qtldf_initial()
for (i in 7:10) {
  qtls<-summary(scan1,perms=scan1perm,alpha=alp,lodcolumn=i)[,c(1:2,2+i)]
  if(nrow(qtls)>0){
    for (j in 1:nrow(qtls)) {
      bay <-bayesint(scan1[,c(1:2,2+i)],chr=qtls$chr[j])
      qtldf <- rbind(qtldf,
                     data.frame(
                       chr = qtls$chr[j],
                       qtl = colnames(bay)[3],
                       so = bay$pos[1],
                       si = bay$pos[2],
                       ei = bay$pos[2],
                       eo = bay$pos[3],
                       col=colorlist[(i-5)]))
    }
  }
}
i<-1
p<-phenames(data)[i+n]
qtls<-summary(scan1.bin,perms=scan1perm.bin,format="tabByCol",alpha=alp,ci.function="bayesint")
if(nrow(qtls[[p]])>0){
for (j in 1:(nrow(qtls[[p]])+1)) {
    if(j==3){
      qtls[[p]]<-rbind(qtls[[p]],cbind(s.aq[[p]][,-3],`ci.low`=bayesint(out.aq.bin[[p]],s.aq[[p]][,1])[1,2],`ci.high`=bayesint(out.aq.bin[[p]],s.aq[[p]][,1])[3,2],`lod`=s.aq[[p]][,3]))
    }
    qtldf<-rbind(qtldf,
                 data.frame(
                   chr = qtls[[p]]$chr[j],
                   qtl = p,
                   so = qtls[[p]]$`ci.low`[j],
                   si = qtls[[p]]$pos[j],
                   ei = qtls[[p]]$pos[j],
                   eo = qtls[[p]]$`ci.high`[j],
                   col=colorlist[7]))
}
}
outfile<-file.path("results/basil_QTLs.fusarium.pdf")
mapthese<-paste0("LG",sort(unique(as.numeric(qtldf$chr))))
main<-paste0("Basil Genetic Map + QTLs for Fusarium (",paste0(mapthese,collapse = ","),")")
setting<-modifyList(setting,list(outfile=outfile,mapthese=mapthese,main=main))
setting$qtldf<-qtldf
do.call(lmv.linkage.plot,setting)
## Required pdf.width = 10.6533333333333

## Required pdf.height = 8.0864

## Using pdf.width = 11

## Using pdf.height = 9

Downy Mildew QTL map

qtldf<-qtldf_initial()
i<-2
p<-phenames(data)[i+n]
if(nrow(qtls[[p]])>0){
for (j in 1:(nrow(qtls[[p]])+1)) {
    if(j==3){
      qtls[[p]]<-rbind(qtls[[p]],cbind(s.aq[[p]][,-3],`ci.low`=bayesint(out.aq.bin[[p]],s.aq[[p]][,1])[1,2],`ci.high`=bayesint(out.aq.bin[[p]],s.aq[[p]][,1])[3,2],`lod`=s.aq[[p]][,3]))
    }
    qtldf<-rbind(qtldf,
                 data.frame(
                   chr = qtls[[p]]$chr[j],
                   qtl = p,
                   so = qtls[[p]]$`ci.low`[j],
                   si = qtls[[p]]$pos[j],
                   ei = qtls[[p]]$pos[j],
                   eo = qtls[[p]]$`ci.high`[j],
                   col=colorlist[7]))
}
}
outfile<-file.path("results/basil_QTLs.BDM.pdf")
mapthese<-paste0("LG",sort(unique(as.numeric(qtldf$chr))))
main<-paste0("Basil Genetic Map + QTLs for Downy Mildew (",paste0(mapthese,collapse = ","),")")
setting<-modifyList(setting,list(outfile=outfile,mapthese=mapthese,main=main))
setting$qtldf<-qtldf
do.call(lmv.linkage.plot,setting)
## Required pdf.width = 6.46766666666667

## Required pdf.height = 6.74613333333333

## Using pdf.width = 7

## Using pdf.height = 7

The pdf files are in the results folder.

There are some matching chromosomes for each phenotypes’ group. For example, the fusarium group is mainly on LG25.

The Anthocyanin group is mainly on LG19 where there’s also BDM resistance locus. This is an interesting finding. It’s worth checking out the markers on this position, align them to the genome and find out what this locus is responible for, if there is a connection between anthocyanin and downy mildew resistance. The phenotypes’ correlation showed no connection though.

Thanks for reading my project, please share your thoughts with me!