AllelicBiase_expressionLevel.bsh

// ###### use mm10 reference genome mapping to calculate expression level ###
// ### examine the distribution of the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
// # 1. identify organ specific domains
// # 2. calculate rpkm of domains from all organs
// # 3. calculate the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
// # 4. make histogram


// # 1. identify organ specific domains
// cd /workdir/sc2457/F1_Tissues/ImprintingOrGenetics/Combined_MB6andPB6/AllelicBiase_expressionLevel_domain
// f=T8_2Strand_p0.05_effect_strain.bed_cluster
// grep -v , ../${f} | cut -f 3- > ${f}_organSpecific

// # 2. calculate rpkm of domains from all organs
// # 3. calculate the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
// # 4. make histogram
// ln -s /workdir/sc2457/F1_Tissues/bigWig/*_all_plus.bw .
// ln -s /workdir/sc2457/F1_Tissues/bigWig/*_all_minus.bw . #*/

// R --vanilla --slave --args $(pwd) T8_2Strand_p0.05_effect_strain.bed_cluster_organSpecific < getCounts_bed_NoStrandness.R


// ###### use diploid genome mapping to calculate expression level ###
// ### examine the distribution of the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
// # 1. identify the organ specific domains
// # 2. calculate rpkm of the block from all organs
// # 3. calculate the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
// # 4. make histogram
// ###
// wait_a_second() {
//   joblist=($(jobs -p))
//     while (( ${#joblist[*]} >= 10 ))
//       do
//       sleep 1
//       joblist=($(jobs -p))
//   done
// }

// PL=/workdir/sc2457/alleleDB/alleledb_pipeline_mouse
// MAPS=/workdir/sc2457/mouse_AlleleSpecific/mouse_genome.sanger.ac.uk/REL-1505-SNPs_Indels/PersonalGenome_P.CAST_M.B6_indelsNsnps_CAST.bam/%s_P.CAST.EiJ_M.C57BL.6J.map
// FDR_SIMS=10
// FDR_CUTOFF=0.1

// BinomialTest_expression(){
//   # BinomialTest $f $j $MAT_READ_BED $PAT_READ_BED
//   f=$1
//   j=$2
//   MAT_READ_BED=$3
//   PAT_READ_BED=$4
//   IDENTICAL_READ_BED=$5

//   bedtools coverage -s -a $f -b ${MAT_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.mat_cov.bed &
//   bedtools coverage -s -a $f -b ${PAT_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.pat_cov.bed &
//   bedtools coverage -s -a $f -b ${IDENTICAL_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.iden_cov.bed &
//   wait

//   # join mat, pat, iden read count 
//   join -t $'\t' -j 1 -o 1.1,1.2,1.3,1.4,1.5,2.5 ${j}.mat_cov.bed ${j}.pat_cov.bed > ${j}.temp_cov.bed
//   join -t $'\t' -j 1 -o 1.1,1.2,1.3,1.4,1.5,1.6,2.5 ${j}.temp_cov.bed ${j}.iden_cov.bed |\
//   awk 'BEGIN{OFS="\t"; t=","} ($5>$6) {print $2, $3, $4, "M"t$5t$6, $5, $6, $7 }  ($5<$6) {print $2, $3, $4, "P"t$5t$6, $5, $6, $7}  ($5==$6) {print $2, $3, $4, "S"t$5t$6, $5, $6, $7} '  \
//   | LC_ALL=C sort -k1,1V -k2,2n --parallel=30 > ${j}.merged_cov.bed

//   mv ${j}.temp_cov.bed ${j}.mat_cov.bed ${j}.pat_cov.bed ${j}.iden_cov.bed toremove
//   # output of BinomialTestFor_merged_cov.bed.py:(hmm+BinomialTest) if p-value <= 0.05, remain what it got from hmm (can ne M,P, or S), otherwise S.
//   python ${PL}/BinomialTestFor_merged_cov.bed.py ${j}.merged_cov.bed ${j}_binomtest.bed
//   mv ${j}.merged_cov.bed toremove

//   python ${PL}/FalsePosFor_merged_cov.bed.py ${j}_binomtest.bed ${FDR_SIMS} ${FDR_CUTOFF} > ${j}_binomtest_FDR${FDR_CUTOFF}.txt &
//   #  awk 'NR==1 { print $0 } NR>1 && ($9+0) <= thresh { print $0 }'  thresh=$(awk 'END {print $6}' ${j}_binomtest_FDR${FDR_CUTOFF}.txt) < ${j}_binomtest.bed  > ${j}_interestingHets.bed
// }


// cd /workdir/sc2457/F1_Tissues/ImprintingOrGenetics/Combined_MB6andPB6/AllelicBiase_expressionLevel_diploidGenome_domain
// ln -s /workdir/sc2457/F1_Tissues/3rd_batch/map2ref/map2ref_bed/ .

// # 1. identify organ specific domains
// f=T8_2Strand_p0.05_effect_strain.bed_cluster
// grep -v , ../${f} | cut -f 3- |LC_ALL=C sort -k1,1V -k2,2n > ${f}_organSpecific
// domain=${f}_organSpecific

// # 2. calculate rpkm of domains from all organs
// cat ${domain} |awk 'BEGIN{OFS="\t"} {print $0, ".", "+"}' > ${domain}_plus
// cat ${domain} |awk 'BEGIN{OFS="\t"} {print $0, ".", "-"}' > ${domain}_minus


// ## map reads from each sample to the organ specific domains
// for Head in BN HT  SK  SP  KD  LV  GI  ST
// do 
// echo $Head
// bed_dir=map2ref_bed
//   for f in ${bed_dir}/${Head}_MB6_*_R1.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz  #each samples from MB6 of the same tissue
//     do PREFIX=`echo $f|cut -d . -f 1`
//     echo $PREFIX
//     MAT_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
//     PAT_READ_BED=${PREFIX}.pat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
//     IDENTICAL_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz

//     P=`echo $PREFIX| cut -d / -f 2`
//     rm ${P}_domain_plus.bed ${P}_domain_minus.bed
//     ln -s ${domain}_plus ${P}_domain_plus.bed
//     ln -s ${domain}_minus ${P}_domain_minus.bed
//     BinomialTest_expression ${P}_domain_plus.bed ${P}_domain_plus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} &
//     BinomialTest_expression ${P}_domain_minus.bed ${P}_domain_minus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} & 
//     wait_a_second
//   done

//   for f in ${bed_dir}/${Head}_PB6_*_R1.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz  #each samples from PB6 of the same tissue
//     do PREFIX=`echo $f|cut -d . -f 1`
//     echo $PREFIX
//     # switch -m and -p for PB6
//     PAT_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
//     MAT_READ_BED=${PREFIX}.pat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
//     IDENTICAL_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz

//     P=`echo $PREFIX| cut -d / -f 2`
//     rm ${P}_domain_plus.bed ${P}_domain_minus.bed
//     ln -s ${domain}_plus ${P}_domain_plus.bed
//     ln -s ${domain}_minus ${P}_domain_minus.bed

//     BinomialTest_expression ${P}_domain_plus.bed ${P}_domain_plus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} &
//     BinomialTest_expression ${P}_domain_minus.bed ${P}_domain_minus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} & 
//     wait_a_second
//   done
// done

// ## make a table of expression level in counts
// for Head in BN HT  SK  SP  KD  LV  GI  ST
//    do for cross in MB6 PB6
//       do for s in plus minus
//          do echo -e 'chrm\tchrmStart\tchrmEnd\t'${Head}_${cross}_${s} > ${Head}_${cross}_${s}_readcounts.txt
//             cat ${Head}_${cross}_all_R1_domain_${s}_binomtest.bed | awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, $6+$7+$8}' >> ${Head}_${cross}_${s}_readcounts.txt
// done
// done
// done


// # combine counts from plus amd minus strand
// # combine counts from PB6 and MB6
// # 3. calculate the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
// # 4. make histogram
// Rscript getCounts_combine_multiple_txtfiles.R


###### use diploid genome mapping to calculate expression level ###
### examine the distribution of the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
# 1. identify blocks within the organ specific domains
# 2. calculate rpkm of the block from all organs
# 3. calculate the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
# 4. make histogram
###
wait_a_second() {
  joblist=($(jobs -p))
    while (( ${#joblist[*]} >= 10 ))
      do
      sleep 1
      joblist=($(jobs -p))
  done
}

PL=/workdir/sc2457/alleleDB/alleledb_pipeline_mouse
MAPS=/workdir/sc2457/mouse_AlleleSpecific/mouse_genome.sanger.ac.uk/REL-1505-SNPs_Indels/PersonalGenome_P.CAST_M.B6_indelsNsnps_CAST.bam/%s_P.CAST.EiJ_M.C57BL.6J.map
FDR_SIMS=10
FDR_CUTOFF=0.1

BinomialTest_expression(){
  # BinomialTest $f $j $MAT_READ_BED $PAT_READ_BED
  f=$1
  j=$2
  MAT_READ_BED=$3
  PAT_READ_BED=$4
  IDENTICAL_READ_BED=$5

  bedtools coverage -s -a $f -b ${MAT_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.mat_cov.bed &
  bedtools coverage -s -a $f -b ${PAT_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.pat_cov.bed &
  bedtools coverage -s -a $f -b ${IDENTICAL_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.iden_cov.bed &
  wait

  # join mat, pat, iden read count 
  join -t $'\t' -j 1 -o 1.1,1.2,1.3,1.4,1.5,2.5 ${j}.mat_cov.bed ${j}.pat_cov.bed > ${j}.temp_cov.bed
  join -t $'\t' -j 1 -o 1.1,1.2,1.3,1.4,1.5,1.6,2.5 ${j}.temp_cov.bed ${j}.iden_cov.bed |\
  awk 'BEGIN{OFS="\t"; t=","} ($5>$6) {print $2, $3, $4, "M"t$5t$6, $5, $6, $7 }  ($5<$6) {print $2, $3, $4, "P"t$5t$6, $5, $6, $7}  ($5==$6) {print $2, $3, $4, "S"t$5t$6, $5, $6, $7} '  \
  | LC_ALL=C sort -k1,1V -k2,2n --parallel=30 > ${j}.merged_cov.bed

  mv ${j}.temp_cov.bed ${j}.mat_cov.bed ${j}.pat_cov.bed ${j}.iden_cov.bed toremove
  # output of BinomialTestFor_merged_cov.bed.py:(hmm+BinomialTest) if p-value <= 0.05, remain what it got from hmm (can ne M,P, or S), otherwise S.
  python ${PL}/BinomialTestFor_merged_cov.bed.py ${j}.merged_cov.bed ${j}_binomtest.bed
  mv ${j}.merged_cov.bed toremove

  python ${PL}/FalsePosFor_merged_cov.bed.py ${j}_binomtest.bed ${FDR_SIMS} ${FDR_CUTOFF} > ${j}_binomtest_FDR${FDR_CUTOFF}.txt &
  #  awk 'NR==1 { print $0 } NR>1 && ($9+0) <= thresh { print $0 }'  thresh=$(awk 'END {print $6}' ${j}_binomtest_FDR${FDR_CUTOFF}.txt) < ${j}_binomtest.bed  > ${j}_interestingHets.bed
}


# 1. identify blocks within the organ specific domains (osdBlock)
cd /workdir/sc2457/F1_Tissues/ImprintingOrGenetics/Combined_MB6andPB6
# pool blocks together
tmp_bed=T8_2Strand_p0.05_effect_strain_withStrandness_temp.bed
rm ${tmp_bed}
  for head in BN SP HT SK KD ST GI LV
    do f=${head}_plus_p0.05_effect_strain.bed
    cat ${head}_plus_p0.05_effect_strain.bed |awk -v h=$head 'BEGIN {OFS="\t"}{print $1,$2,$3, h, ".", "+"}'>> ${tmp_bed}
    cat ${head}_minus_p0.05_effect_strain.bed |awk -v h=$head 'BEGIN {OFS="\t"}{print $1,$2,$3, h, ".", "-"}'>> ${tmp_bed}
  done
  sort-bed ${tmp_bed} > T8_2Strand_p0.05_effect_strain_withStrandness.bed

cd /workdir/sc2457/F1_Tissues/ImprintingOrGenetics/Combined_MB6andPB6/AllelicBiase_expressionLevel_diploidGenome_block
ln -s /workdir/sc2457/F1_Tissues/3rd_batch/map2ref/map2ref_bed/ .

#f=T8_2Strand_p0.05_effect_strain.bed_cluster
#grep -v , ../${f} | cut -f 3- |LC_ALL=C sort -k1,1V -k2,2n > ../${f}_organSpecific

intersectBed -u -a ../T8_2Strand_p0.05_effect_strain_withStrandness.bed -b ../T8_2Strand_p0.05_effect_strain.bed_cluster_organSpecific > T8_2Strand_p0.05_effect_strain_withStrandness.bed_organSpecific

# merge overlapped, blocks in organ-specific domain, keep strandness
# not using this for calculation. Just to check if a organ-specific-biased can be biased on both plus and minus strand? YES!
bedtools merge -i T8_2Strand_p0.05_effect_strain_withStrandness.bed_organSpecific  -c 4,6 -o distinct -d 0 |awk 'BEGIN {OFS="\t";m=":";d="-"}{print ($3-$2)/1000000, $1m$2d$3, $0 }' > T8_2Strand_p0.05_effect_strain_withStrandness.bed_organSpecific_cluster  

# use blocks within organ-specific-biased domains
osdBlock=T8_2Strand_p0.05_effect_strain_withStrandness.bed_organSpecific

# 2. calculate rpkm (Reads per kilo base per million mapped reads) of osdBlock blocks in organ specific domain from all organs
# need to seperate to plus and minus strands because BinomialTest_expression can only haddle one strand at a time
grep + ${osdBlock} |LC_ALL=C sort -k1,1V -k2,2n > ${osdBlock}_plus
grep - ${osdBlock} |LC_ALL=C sort -k1,1V -k2,2n> ${osdBlock}_minus


## map reads from each sample to the osdBlock
mkdir toremove
for Head in BN HT  SK  SP  KD  LV  GI  ST
do 
echo $Head
bed_dir=map2ref_bed
  for f in ${bed_dir}/${Head}_MB6_*_R1.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz  #each samples from MB6 of the same tissue
    do PREFIX=`echo $f|cut -d . -f 1`
    echo $PREFIX
    MAT_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
    PAT_READ_BED=${PREFIX}.pat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
    IDENTICAL_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz

    P=`echo $PREFIX| cut -d / -f 2`
    rm ${P}_osdBlock_plus.bed ${P}_osdBlock_minus.bed
    ln -s ${osdBlock}_plus ${P}_osdBlock_plus.bed
    ln -s ${osdBlock}_minus ${P}_osdBlock_minus.bed
    BinomialTest_expression ${P}_osdBlock_plus.bed ${P}_osdBlock_plus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} &
    BinomialTest_expression ${P}_osdBlock_minus.bed ${P}_osdBlock_minus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} & 
    wait_a_second
  done

  for f in ${bed_dir}/${Head}_PB6_*_R1.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz  #each samples from PB6 of the same tissue
    do PREFIX=`echo $f|cut -d . -f 1`
    echo $PREFIX
    # switch -m and -p for PB6
    PAT_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
    MAT_READ_BED=${PREFIX}.pat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
    IDENTICAL_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz

    P=`echo $PREFIX| cut -d / -f 2`
    rm ${P}_osdBlock_plus.bed ${P}_osdBlock_minus.bed
    ln -s ${osdBlock}_plus ${P}_osdBlock_plus.bed
    ln -s ${osdBlock}_minus ${P}_osdBlock_minus.bed

    BinomialTest_expression ${P}_osdBlock_plus.bed ${P}_osdBlock_plus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} &
    BinomialTest_expression ${P}_osdBlock_minus.bed ${P}_osdBlock_minus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} & 
    wait_a_second
  done
done

## make a table of expression level in counts
for Head in BN HT  SK  SP  KD  LV  GI  ST
   do for cross in MB6 PB6
         do echo -e 'chrm\tchrmStart\tchrmEnd\tchrStrand\t'${Head}_${cross} > ${Head}_${cross}_readcounts.txt
            cat ${Head}_${cross}_all_R1_osdBlock_plus_binomtest.bed| awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, "+", $6+$7+$8}' >> ${Head}_${cross}_readcounts.txt
            cat ${Head}_${cross}_all_R1_osdBlock_minus_binomtest.bed| awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, "-", $6+$7+$8}' >> ${Head}_${cross}_readcounts.txt
done
done


# combine counts from plus amd minus strand
# combine counts from PB6 and MB6
# 3. calculate the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
# 4. make histogram
# Rscript getCounts_combine_multiple_txtfiles.R
Rscript getNonBiasedHighest_Biased_TotalReadCountRatio.R


### Are those expressed really not biased??? Or we are too strigent?
# combine allele-specific reads from MB6 and PB6 to perform binomial test

cd /workdir/sc2457/F1_Tissues/ImprintingOrGenetics/Combined_MB6andPB6/AllelicBiase_expressionLevel_diploidGenome_block/MisB6_PisCAST
ln -s /workdir/sc2457/F1_Tissues/3rd_batch/map2ref/map2ref_bed/ .
ln -s ../T8_2Strand_p0.05_effect_strain_withStrandness.bed_organSpecific .
osdBlock=T8_2Strand_p0.05_effect_strain_withStrandness.bed_organSpecific  #blocks within the organ specific domains (osdBlock)
mkdir toremove

PL=/workdir/sc2457/alleleDB/alleledb_pipeline_mouse
MAPS=/workdir/sc2457/mouse_AlleleSpecific/mouse_genome.sanger.ac.uk/REL-1505-SNPs_Indels/PersonalGenome_P.CAST_M.B6_indelsNsnps_CAST.bam/%s_P.CAST.EiJ_M.C57BL.6J.map
FDR_SIMS=10
FDR_CUTOFF=0.1

wait_a_second() {
  joblist=($(jobs -p))
    while (( ${#joblist[*]} >= 5 ))
      do
      sleep 1
      joblist=($(jobs -p))
  done
}

BinomialTest(){
  # only use mat-specific or pat-specific reads, ignore IDENTICAL_READ_BED
  # take stradness into account but only process one strand (plus or minus) at a time
  # BinomialTest $f $j $MAT_READ_BED $PAT_READ_BED
  f=$1
  j=$2
  MAT_READ_BED=$3
  PAT_READ_BED=$4

  bedtools coverage -s -a $f -b <(zcat ${MAT_READ_BED}) -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.mat_cov.bed &
  bedtools coverage -s -a $f -b <(zcat ${PAT_READ_BED}) -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.pat_cov.bed &
  #bedtools coverage -s -a $f -b ${IDENTICAL_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.iden_cov.bed &
  wait

  # keep every blocks inclding block with at 0 allele-specific read
  join -t $'\t' -j 1 -o 1.1,1.2,1.3,1.4,1.5,2.5 ${j}.mat_cov.bed ${j}.pat_cov.bed | \
  awk 'BEGIN{OFS="\t"; t=","} ($5>$6) {print $2, $3, $4, "M"t$5t$6, $5, $6, "-" }  ($5<$6) {print $2, $3, $4, "P"t$5t$6, $5, $6, "-"}  ($5==$6) {print $2, $3, $4, "S"t$5t$6, $5, $6, "-"} '  \
  | LC_ALL=C sort -k1,1V -k2,2n --parallel=30 > ${j}.merged_cov.bed

  mv ${j}.mat_cov.bed ${j}.pat_cov.bed toremove
  # output of BinomialTestFor_merged_cov.bed.py:(hmm+BinomialTest) if p-value <= 0.05, remain what it got from hmm (can ne M,P, or S), otherwise S.
  python ${PL}/BinomialTestFor_merged_cov.bed.py ${j}.merged_cov.bed ${j}_binomtest.bed
  mv ${j}.merged_cov.bed toremove

  python ${PL}/FalsePosFor_merged_cov.bed.py ${j}_binomtest.bed ${FDR_SIMS} ${FDR_CUTOFF} > ${j}_binomtest_FDR${FDR_CUTOFF}.txt &
#  awk 'NR==1 { print $0 } NR>1 && ($9+0) <= thresh { print $0 }'  thresh=$(awk 'END {print $6}' ${j}_binomtest_FDR${FDR_CUTOFF}.txt) < ${j}_binomtest.bed  > ${j}_interestingHets.bed
}

# Pool MB6 and PB6 reads together. mat is B6 reads, pat is Cast reads
for Head in BN HT  SK  SP  KD  LV  GI  ST
do 
echo $Head
bed_dir=map2ref_bed
zcat ${bed_dir}/${Head}_MB6_all_R1.mat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz ${bed_dir}/${Head}_PB6_all_R1.mat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz | LC_ALL=C sort -k1,1V -k2,2n |gzip >${Head}_mat_temp.gz  &
zcat ${bed_dir}/${Head}_MB6_all_R1.pat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz ${bed_dir}/${Head}_PB6_all_R1.pat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz | LC_ALL=C sort -k1,1V -k2,2n |gzip >${Head}_pat_temp.gz  &
wait
done


# Perform BinomialTest one strand at a time, using pooled MB6 and PB6 reads 
for Head in BN HT  SK  SP  KD  LV  GI  ST
do 
    MAT_READ_BED=${Head}_mat_temp.gz
    PAT_READ_BED=${Head}_pat_temp.gz
    P=${Head}_PoolMB6PB6
    ln -s ${osdBlock}_plus ${P}_osdBlock_plus.bed
    ln -s ${osdBlock}_minus ${P}_osdBlock_minus.bed
    BinomialTest ${P}_osdBlock_plus.bed ${P}_osdBlock_plus ${MAT_READ_BED} ${PAT_READ_BED} &
    BinomialTest ${P}_osdBlock_minus.bed ${P}_osdBlock_minus ${MAT_READ_BED} ${PAT_READ_BED} & 
    wait_a_second
done


### examine the p-value of the non-biased organ with highest expression in the organ specific domains
## use pool reads p-value
# make a table of p-value
for Head in BN HT  SK  SP  KD  LV  GI  ST
         do echo -e 'chrm\tchrmStart\tchrmEnd\tchrStrand\t'${Head} > ${Head}_pValue.txt
            cat ${Head}_PoolMB6PB6_osdBlock_plus_binomtest.bed| awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, "+", $9}' >> ${Head}_pValue.txt
            cat ${Head}_PoolMB6PB6_osdBlock_minus_binomtest.bed| awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, "-", $9}' >> ${Head}_pValue.txt
done

# make a table of allele-specifc reads chrm chrmStart chrmEnd chrStrand ${Head}.win ${Head}.mat ${Head}.pat
for Head in BN HT  SK  SP  KD  LV  GI  ST
         do echo -e 'chrm\tchrmStart\tchrmEnd\tchrStrand\t'${Head}'.win\t'${Head}'.mat\t'${Head}'.pat' > ${Head}_AlleleSpecificReads.txt
            cat ${Head}_PoolMB6PB6_osdBlock_plus_binomtest.bed| awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, "+", substr($4,1,1), $6, $7}' >> ${Head}_AlleleSpecificReads.txt
            cat ${Head}_PoolMB6PB6_osdBlock_minus_binomtest.bed| awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, "-", substr($4,1,1), $6, $7}' >> ${Head}_AlleleSpecificReads.txt
done


Rscript getNonBiasedHighest_Biased_AllelicBiaseDistribution.R