-
Notifications
You must be signed in to change notification settings - Fork 0
/
AllelicBiase_expressionLevel.bsh
408 lines (311 loc) · 19.9 KB
/
AllelicBiase_expressionLevel.bsh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
// ###### use mm10 reference genome mapping to calculate expression level ###
// ### examine the distribution of the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
// # 1. identify organ specific domains
// # 2. calculate rpkm of domains from all organs
// # 3. calculate the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
// # 4. make histogram
// # 1. identify organ specific domains
// cd /workdir/sc2457/F1_Tissues/ImprintingOrGenetics/Combined_MB6andPB6/AllelicBiase_expressionLevel_domain
// f=T8_2Strand_p0.05_effect_strain.bed_cluster
// grep -v , ../${f} | cut -f 3- > ${f}_organSpecific
// # 2. calculate rpkm of domains from all organs
// # 3. calculate the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
// # 4. make histogram
// ln -s /workdir/sc2457/F1_Tissues/bigWig/*_all_plus.bw .
// ln -s /workdir/sc2457/F1_Tissues/bigWig/*_all_minus.bw . #*/
// R --vanilla --slave --args $(pwd) T8_2Strand_p0.05_effect_strain.bed_cluster_organSpecific < getCounts_bed_NoStrandness.R
// ###### use diploid genome mapping to calculate expression level ###
// ### examine the distribution of the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
// # 1. identify the organ specific domains
// # 2. calculate rpkm of the block from all organs
// # 3. calculate the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
// # 4. make histogram
// ###
// wait_a_second() {
// joblist=($(jobs -p))
// while (( ${#joblist[*]} >= 10 ))
// do
// sleep 1
// joblist=($(jobs -p))
// done
// }
// PL=/workdir/sc2457/alleleDB/alleledb_pipeline_mouse
// MAPS=/workdir/sc2457/mouse_AlleleSpecific/mouse_genome.sanger.ac.uk/REL-1505-SNPs_Indels/PersonalGenome_P.CAST_M.B6_indelsNsnps_CAST.bam/%s_P.CAST.EiJ_M.C57BL.6J.map
// FDR_SIMS=10
// FDR_CUTOFF=0.1
// BinomialTest_expression(){
// # BinomialTest $f $j $MAT_READ_BED $PAT_READ_BED
// f=$1
// j=$2
// MAT_READ_BED=$3
// PAT_READ_BED=$4
// IDENTICAL_READ_BED=$5
// bedtools coverage -s -a $f -b ${MAT_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.mat_cov.bed &
// bedtools coverage -s -a $f -b ${PAT_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.pat_cov.bed &
// bedtools coverage -s -a $f -b ${IDENTICAL_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.iden_cov.bed &
// wait
// # join mat, pat, iden read count
// join -t $'\t' -j 1 -o 1.1,1.2,1.3,1.4,1.5,2.5 ${j}.mat_cov.bed ${j}.pat_cov.bed > ${j}.temp_cov.bed
// join -t $'\t' -j 1 -o 1.1,1.2,1.3,1.4,1.5,1.6,2.5 ${j}.temp_cov.bed ${j}.iden_cov.bed |\
// awk 'BEGIN{OFS="\t"; t=","} ($5>$6) {print $2, $3, $4, "M"t$5t$6, $5, $6, $7 } ($5<$6) {print $2, $3, $4, "P"t$5t$6, $5, $6, $7} ($5==$6) {print $2, $3, $4, "S"t$5t$6, $5, $6, $7} ' \
// | LC_ALL=C sort -k1,1V -k2,2n --parallel=30 > ${j}.merged_cov.bed
// mv ${j}.temp_cov.bed ${j}.mat_cov.bed ${j}.pat_cov.bed ${j}.iden_cov.bed toremove
// # output of BinomialTestFor_merged_cov.bed.py:(hmm+BinomialTest) if p-value <= 0.05, remain what it got from hmm (can ne M,P, or S), otherwise S.
// python ${PL}/BinomialTestFor_merged_cov.bed.py ${j}.merged_cov.bed ${j}_binomtest.bed
// mv ${j}.merged_cov.bed toremove
// python ${PL}/FalsePosFor_merged_cov.bed.py ${j}_binomtest.bed ${FDR_SIMS} ${FDR_CUTOFF} > ${j}_binomtest_FDR${FDR_CUTOFF}.txt &
// # awk 'NR==1 { print $0 } NR>1 && ($9+0) <= thresh { print $0 }' thresh=$(awk 'END {print $6}' ${j}_binomtest_FDR${FDR_CUTOFF}.txt) < ${j}_binomtest.bed > ${j}_interestingHets.bed
// }
// cd /workdir/sc2457/F1_Tissues/ImprintingOrGenetics/Combined_MB6andPB6/AllelicBiase_expressionLevel_diploidGenome_domain
// ln -s /workdir/sc2457/F1_Tissues/3rd_batch/map2ref/map2ref_bed/ .
// # 1. identify organ specific domains
// f=T8_2Strand_p0.05_effect_strain.bed_cluster
// grep -v , ../${f} | cut -f 3- |LC_ALL=C sort -k1,1V -k2,2n > ${f}_organSpecific
// domain=${f}_organSpecific
// # 2. calculate rpkm of domains from all organs
// cat ${domain} |awk 'BEGIN{OFS="\t"} {print $0, ".", "+"}' > ${domain}_plus
// cat ${domain} |awk 'BEGIN{OFS="\t"} {print $0, ".", "-"}' > ${domain}_minus
// ## map reads from each sample to the organ specific domains
// for Head in BN HT SK SP KD LV GI ST
// do
// echo $Head
// bed_dir=map2ref_bed
// for f in ${bed_dir}/${Head}_MB6_*_R1.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz #each samples from MB6 of the same tissue
// do PREFIX=`echo $f|cut -d . -f 1`
// echo $PREFIX
// MAT_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
// PAT_READ_BED=${PREFIX}.pat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
// IDENTICAL_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz
// P=`echo $PREFIX| cut -d / -f 2`
// rm ${P}_domain_plus.bed ${P}_domain_minus.bed
// ln -s ${domain}_plus ${P}_domain_plus.bed
// ln -s ${domain}_minus ${P}_domain_minus.bed
// BinomialTest_expression ${P}_domain_plus.bed ${P}_domain_plus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} &
// BinomialTest_expression ${P}_domain_minus.bed ${P}_domain_minus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} &
// wait_a_second
// done
// for f in ${bed_dir}/${Head}_PB6_*_R1.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz #each samples from PB6 of the same tissue
// do PREFIX=`echo $f|cut -d . -f 1`
// echo $PREFIX
// # switch -m and -p for PB6
// PAT_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
// MAT_READ_BED=${PREFIX}.pat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
// IDENTICAL_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz
// P=`echo $PREFIX| cut -d / -f 2`
// rm ${P}_domain_plus.bed ${P}_domain_minus.bed
// ln -s ${domain}_plus ${P}_domain_plus.bed
// ln -s ${domain}_minus ${P}_domain_minus.bed
// BinomialTest_expression ${P}_domain_plus.bed ${P}_domain_plus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} &
// BinomialTest_expression ${P}_domain_minus.bed ${P}_domain_minus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} &
// wait_a_second
// done
// done
// ## make a table of expression level in counts
// for Head in BN HT SK SP KD LV GI ST
// do for cross in MB6 PB6
// do for s in plus minus
// do echo -e 'chrm\tchrmStart\tchrmEnd\t'${Head}_${cross}_${s} > ${Head}_${cross}_${s}_readcounts.txt
// cat ${Head}_${cross}_all_R1_domain_${s}_binomtest.bed | awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, $6+$7+$8}' >> ${Head}_${cross}_${s}_readcounts.txt
// done
// done
// done
// # combine counts from plus amd minus strand
// # combine counts from PB6 and MB6
// # 3. calculate the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
// # 4. make histogram
// Rscript getCounts_combine_multiple_txtfiles.R
###### use diploid genome mapping to calculate expression level ###
### examine the distribution of the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
# 1. identify blocks within the organ specific domains
# 2. calculate rpkm of the block from all organs
# 3. calculate the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
# 4. make histogram
###
wait_a_second() {
joblist=($(jobs -p))
while (( ${#joblist[*]} >= 10 ))
do
sleep 1
joblist=($(jobs -p))
done
}
PL=/workdir/sc2457/alleleDB/alleledb_pipeline_mouse
MAPS=/workdir/sc2457/mouse_AlleleSpecific/mouse_genome.sanger.ac.uk/REL-1505-SNPs_Indels/PersonalGenome_P.CAST_M.B6_indelsNsnps_CAST.bam/%s_P.CAST.EiJ_M.C57BL.6J.map
FDR_SIMS=10
FDR_CUTOFF=0.1
BinomialTest_expression(){
# BinomialTest $f $j $MAT_READ_BED $PAT_READ_BED
f=$1
j=$2
MAT_READ_BED=$3
PAT_READ_BED=$4
IDENTICAL_READ_BED=$5
bedtools coverage -s -a $f -b ${MAT_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.mat_cov.bed &
bedtools coverage -s -a $f -b ${PAT_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.pat_cov.bed &
bedtools coverage -s -a $f -b ${IDENTICAL_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.iden_cov.bed &
wait
# join mat, pat, iden read count
join -t $'\t' -j 1 -o 1.1,1.2,1.3,1.4,1.5,2.5 ${j}.mat_cov.bed ${j}.pat_cov.bed > ${j}.temp_cov.bed
join -t $'\t' -j 1 -o 1.1,1.2,1.3,1.4,1.5,1.6,2.5 ${j}.temp_cov.bed ${j}.iden_cov.bed |\
awk 'BEGIN{OFS="\t"; t=","} ($5>$6) {print $2, $3, $4, "M"t$5t$6, $5, $6, $7 } ($5<$6) {print $2, $3, $4, "P"t$5t$6, $5, $6, $7} ($5==$6) {print $2, $3, $4, "S"t$5t$6, $5, $6, $7} ' \
| LC_ALL=C sort -k1,1V -k2,2n --parallel=30 > ${j}.merged_cov.bed
mv ${j}.temp_cov.bed ${j}.mat_cov.bed ${j}.pat_cov.bed ${j}.iden_cov.bed toremove
# output of BinomialTestFor_merged_cov.bed.py:(hmm+BinomialTest) if p-value <= 0.05, remain what it got from hmm (can ne M,P, or S), otherwise S.
python ${PL}/BinomialTestFor_merged_cov.bed.py ${j}.merged_cov.bed ${j}_binomtest.bed
mv ${j}.merged_cov.bed toremove
python ${PL}/FalsePosFor_merged_cov.bed.py ${j}_binomtest.bed ${FDR_SIMS} ${FDR_CUTOFF} > ${j}_binomtest_FDR${FDR_CUTOFF}.txt &
# awk 'NR==1 { print $0 } NR>1 && ($9+0) <= thresh { print $0 }' thresh=$(awk 'END {print $6}' ${j}_binomtest_FDR${FDR_CUTOFF}.txt) < ${j}_binomtest.bed > ${j}_interestingHets.bed
}
# 1. identify blocks within the organ specific domains (osdBlock)
cd /workdir/sc2457/F1_Tissues/ImprintingOrGenetics/Combined_MB6andPB6
# pool blocks together
tmp_bed=T8_2Strand_p0.05_effect_strain_withStrandness_temp.bed
rm ${tmp_bed}
for head in BN SP HT SK KD ST GI LV
do f=${head}_plus_p0.05_effect_strain.bed
cat ${head}_plus_p0.05_effect_strain.bed |awk -v h=$head 'BEGIN {OFS="\t"}{print $1,$2,$3, h, ".", "+"}'>> ${tmp_bed}
cat ${head}_minus_p0.05_effect_strain.bed |awk -v h=$head 'BEGIN {OFS="\t"}{print $1,$2,$3, h, ".", "-"}'>> ${tmp_bed}
done
sort-bed ${tmp_bed} > T8_2Strand_p0.05_effect_strain_withStrandness.bed
cd /workdir/sc2457/F1_Tissues/ImprintingOrGenetics/Combined_MB6andPB6/AllelicBiase_expressionLevel_diploidGenome_block
ln -s /workdir/sc2457/F1_Tissues/3rd_batch/map2ref/map2ref_bed/ .
#f=T8_2Strand_p0.05_effect_strain.bed_cluster
#grep -v , ../${f} | cut -f 3- |LC_ALL=C sort -k1,1V -k2,2n > ../${f}_organSpecific
intersectBed -u -a ../T8_2Strand_p0.05_effect_strain_withStrandness.bed -b ../T8_2Strand_p0.05_effect_strain.bed_cluster_organSpecific > T8_2Strand_p0.05_effect_strain_withStrandness.bed_organSpecific
# merge overlapped, blocks in organ-specific domain, keep strandness
# not using this for calculation. Just to check if a organ-specific-biased can be biased on both plus and minus strand? YES!
bedtools merge -i T8_2Strand_p0.05_effect_strain_withStrandness.bed_organSpecific -c 4,6 -o distinct -d 0 |awk 'BEGIN {OFS="\t";m=":";d="-"}{print ($3-$2)/1000000, $1m$2d$3, $0 }' > T8_2Strand_p0.05_effect_strain_withStrandness.bed_organSpecific_cluster
# use blocks within organ-specific-biased domains
osdBlock=T8_2Strand_p0.05_effect_strain_withStrandness.bed_organSpecific
# 2. calculate rpkm (Reads per kilo base per million mapped reads) of osdBlock blocks in organ specific domain from all organs
# need to seperate to plus and minus strands because BinomialTest_expression can only haddle one strand at a time
grep + ${osdBlock} |LC_ALL=C sort -k1,1V -k2,2n > ${osdBlock}_plus
grep - ${osdBlock} |LC_ALL=C sort -k1,1V -k2,2n> ${osdBlock}_minus
## map reads from each sample to the osdBlock
mkdir toremove
for Head in BN HT SK SP KD LV GI ST
do
echo $Head
bed_dir=map2ref_bed
for f in ${bed_dir}/${Head}_MB6_*_R1.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz #each samples from MB6 of the same tissue
do PREFIX=`echo $f|cut -d . -f 1`
echo $PREFIX
MAT_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
PAT_READ_BED=${PREFIX}.pat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
IDENTICAL_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz
P=`echo $PREFIX| cut -d / -f 2`
rm ${P}_osdBlock_plus.bed ${P}_osdBlock_minus.bed
ln -s ${osdBlock}_plus ${P}_osdBlock_plus.bed
ln -s ${osdBlock}_minus ${P}_osdBlock_minus.bed
BinomialTest_expression ${P}_osdBlock_plus.bed ${P}_osdBlock_plus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} &
BinomialTest_expression ${P}_osdBlock_minus.bed ${P}_osdBlock_minus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} &
wait_a_second
done
for f in ${bed_dir}/${Head}_PB6_*_R1.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz #each samples from PB6 of the same tissue
do PREFIX=`echo $f|cut -d . -f 1`
echo $PREFIX
# switch -m and -p for PB6
PAT_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
MAT_READ_BED=${PREFIX}.pat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz
IDENTICAL_READ_BED=${PREFIX}.mat.bowtie.gz_AMBremoved_sorted_identical.map2ref.sorted.bed.gz
P=`echo $PREFIX| cut -d / -f 2`
rm ${P}_osdBlock_plus.bed ${P}_osdBlock_minus.bed
ln -s ${osdBlock}_plus ${P}_osdBlock_plus.bed
ln -s ${osdBlock}_minus ${P}_osdBlock_minus.bed
BinomialTest_expression ${P}_osdBlock_plus.bed ${P}_osdBlock_plus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} &
BinomialTest_expression ${P}_osdBlock_minus.bed ${P}_osdBlock_minus ${MAT_READ_BED} ${PAT_READ_BED} ${IDENTICAL_READ_BED} &
wait_a_second
done
done
## make a table of expression level in counts
for Head in BN HT SK SP KD LV GI ST
do for cross in MB6 PB6
do echo -e 'chrm\tchrmStart\tchrmEnd\tchrStrand\t'${Head}_${cross} > ${Head}_${cross}_readcounts.txt
cat ${Head}_${cross}_all_R1_osdBlock_plus_binomtest.bed| awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, "+", $6+$7+$8}' >> ${Head}_${cross}_readcounts.txt
cat ${Head}_${cross}_all_R1_osdBlock_minus_binomtest.bed| awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, "-", $6+$7+$8}' >> ${Head}_${cross}_readcounts.txt
done
done
# combine counts from plus amd minus strand
# combine counts from PB6 and MB6
# 3. calculate the ratio (non biased organ with highest expression level rpkm)/ biased organ rpkm
# 4. make histogram
# Rscript getCounts_combine_multiple_txtfiles.R
Rscript getNonBiasedHighest_Biased_TotalReadCountRatio.R
### Are those expressed really not biased??? Or we are too strigent?
# combine allele-specific reads from MB6 and PB6 to perform binomial test
cd /workdir/sc2457/F1_Tissues/ImprintingOrGenetics/Combined_MB6andPB6/AllelicBiase_expressionLevel_diploidGenome_block/MisB6_PisCAST
ln -s /workdir/sc2457/F1_Tissues/3rd_batch/map2ref/map2ref_bed/ .
ln -s ../T8_2Strand_p0.05_effect_strain_withStrandness.bed_organSpecific .
osdBlock=T8_2Strand_p0.05_effect_strain_withStrandness.bed_organSpecific #blocks within the organ specific domains (osdBlock)
mkdir toremove
PL=/workdir/sc2457/alleleDB/alleledb_pipeline_mouse
MAPS=/workdir/sc2457/mouse_AlleleSpecific/mouse_genome.sanger.ac.uk/REL-1505-SNPs_Indels/PersonalGenome_P.CAST_M.B6_indelsNsnps_CAST.bam/%s_P.CAST.EiJ_M.C57BL.6J.map
FDR_SIMS=10
FDR_CUTOFF=0.1
wait_a_second() {
joblist=($(jobs -p))
while (( ${#joblist[*]} >= 5 ))
do
sleep 1
joblist=($(jobs -p))
done
}
BinomialTest(){
# only use mat-specific or pat-specific reads, ignore IDENTICAL_READ_BED
# take stradness into account but only process one strand (plus or minus) at a time
# BinomialTest $f $j $MAT_READ_BED $PAT_READ_BED
f=$1
j=$2
MAT_READ_BED=$3
PAT_READ_BED=$4
bedtools coverage -s -a $f -b <(zcat ${MAT_READ_BED}) -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.mat_cov.bed &
bedtools coverage -s -a $f -b <(zcat ${PAT_READ_BED}) -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.pat_cov.bed &
#bedtools coverage -s -a $f -b ${IDENTICAL_READ_BED} -sorted| awk 'BEGIN {OFS="\t"; t="_"} {print $1t$2t$3, $1,$2,$3,$7,$8,$9,$10}' |LC_ALL=C sort -k1,1V -k2,2n > ${j}.iden_cov.bed &
wait
# keep every blocks inclding block with at 0 allele-specific read
join -t $'\t' -j 1 -o 1.1,1.2,1.3,1.4,1.5,2.5 ${j}.mat_cov.bed ${j}.pat_cov.bed | \
awk 'BEGIN{OFS="\t"; t=","} ($5>$6) {print $2, $3, $4, "M"t$5t$6, $5, $6, "-" } ($5<$6) {print $2, $3, $4, "P"t$5t$6, $5, $6, "-"} ($5==$6) {print $2, $3, $4, "S"t$5t$6, $5, $6, "-"} ' \
| LC_ALL=C sort -k1,1V -k2,2n --parallel=30 > ${j}.merged_cov.bed
mv ${j}.mat_cov.bed ${j}.pat_cov.bed toremove
# output of BinomialTestFor_merged_cov.bed.py:(hmm+BinomialTest) if p-value <= 0.05, remain what it got from hmm (can ne M,P, or S), otherwise S.
python ${PL}/BinomialTestFor_merged_cov.bed.py ${j}.merged_cov.bed ${j}_binomtest.bed
mv ${j}.merged_cov.bed toremove
python ${PL}/FalsePosFor_merged_cov.bed.py ${j}_binomtest.bed ${FDR_SIMS} ${FDR_CUTOFF} > ${j}_binomtest_FDR${FDR_CUTOFF}.txt &
# awk 'NR==1 { print $0 } NR>1 && ($9+0) <= thresh { print $0 }' thresh=$(awk 'END {print $6}' ${j}_binomtest_FDR${FDR_CUTOFF}.txt) < ${j}_binomtest.bed > ${j}_interestingHets.bed
}
# Pool MB6 and PB6 reads together. mat is B6 reads, pat is Cast reads
for Head in BN HT SK SP KD LV GI ST
do
echo $Head
bed_dir=map2ref_bed
zcat ${bed_dir}/${Head}_MB6_all_R1.mat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz ${bed_dir}/${Head}_PB6_all_R1.mat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz | LC_ALL=C sort -k1,1V -k2,2n |gzip >${Head}_mat_temp.gz &
zcat ${bed_dir}/${Head}_MB6_all_R1.pat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz ${bed_dir}/${Head}_PB6_all_R1.pat.bowtie.gz_AMBremoved_sorted_specific.map2ref.sorted.bed.gz | LC_ALL=C sort -k1,1V -k2,2n |gzip >${Head}_pat_temp.gz &
wait
done
# Perform BinomialTest one strand at a time, using pooled MB6 and PB6 reads
for Head in BN HT SK SP KD LV GI ST
do
MAT_READ_BED=${Head}_mat_temp.gz
PAT_READ_BED=${Head}_pat_temp.gz
P=${Head}_PoolMB6PB6
ln -s ${osdBlock}_plus ${P}_osdBlock_plus.bed
ln -s ${osdBlock}_minus ${P}_osdBlock_minus.bed
BinomialTest ${P}_osdBlock_plus.bed ${P}_osdBlock_plus ${MAT_READ_BED} ${PAT_READ_BED} &
BinomialTest ${P}_osdBlock_minus.bed ${P}_osdBlock_minus ${MAT_READ_BED} ${PAT_READ_BED} &
wait_a_second
done
### examine the p-value of the non-biased organ with highest expression in the organ specific domains
## use pool reads p-value
# make a table of p-value
for Head in BN HT SK SP KD LV GI ST
do echo -e 'chrm\tchrmStart\tchrmEnd\tchrStrand\t'${Head} > ${Head}_pValue.txt
cat ${Head}_PoolMB6PB6_osdBlock_plus_binomtest.bed| awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, "+", $9}' >> ${Head}_pValue.txt
cat ${Head}_PoolMB6PB6_osdBlock_minus_binomtest.bed| awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, "-", $9}' >> ${Head}_pValue.txt
done
# make a table of allele-specifc reads chrm chrmStart chrmEnd chrStrand ${Head}.win ${Head}.mat ${Head}.pat
for Head in BN HT SK SP KD LV GI ST
do echo -e 'chrm\tchrmStart\tchrmEnd\tchrStrand\t'${Head}'.win\t'${Head}'.mat\t'${Head}'.pat' > ${Head}_AlleleSpecificReads.txt
cat ${Head}_PoolMB6PB6_osdBlock_plus_binomtest.bed| awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, "+", substr($4,1,1), $6, $7}' >> ${Head}_AlleleSpecificReads.txt
cat ${Head}_PoolMB6PB6_osdBlock_minus_binomtest.bed| awk 'BEGIN {OFS="\t"; t="_"} NR>1 {print $1,$2,$3, "-", substr($4,1,1), $6, $7}' >> ${Head}_AlleleSpecificReads.txt
done
Rscript getNonBiasedHighest_Biased_AllelicBiaseDistribution.R