forked from ArimaGenomics/mapping_pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter_five_end.pl
108 lines (101 loc) · 3.2 KB
/
filter_five_end.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/perl
use strict;
use warnings;
my $prev_id = "";
my @five;
my @three;
my @unmap;
my @mid;
my @all;
my $counter = 0;
while (<STDIN>){
chomp;
if (/^@/){
print($_ . "\n");
next;
}
my ($id, $flag, $chr_from, $loc_from, $mapq, $cigar, $d1, $d2, $d3, $read, $read_qual, @rest) = split(/\t/);
my $bin = reverse(dec2bin($flag));
my @binary = split(//, $bin);
if ($prev_id ne $id && $prev_id ne ""){
if ($counter == 1){
if (@five == 1){
print($five[0] . "\n");
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split(/\t/, $all[0]);
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//, $bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("",@binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t", $id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) . "\n");
}
}
elsif ($counter == 2 && @five == 1){
print($five[0] . "\n");
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split(/\t/, $all[0]);
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//, $bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("", @binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t", $id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) . "\n");
}
$counter = 0;
undef @unmap;
undef @five;
undef @three;
undef @mid;
undef @all;
}
$counter++;
$prev_id = $id;
push(@all, $_);
if ($binary[2] == 1){
push @unmap,$_;
}
elsif ($binary[4] == 0 && $cigar =~ m/^[0-9]*M/ || $binary[4] == 1 && $cigar =~ m/.*M$/){
push(@five, $_);
}
elsif ($binary[4] == 1 && $cigar =~ m/^[0-9]*M/ || $binary[4] == 0 && $cigar =~ m/.*M$/){
push(@three, $_);
}
elsif ($cigar =~ m/^[0-9]*[HS].*M.*[HS]$/){
push(@mid, $_);
}
}
if ($counter == 1){
if (@five == 1){
print($five[0] . "\n");
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split(/\t/, $all[0]);
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//, $bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("", @binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t", $id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) . "\n");
}
}
elsif ($counter == 2 && @five == 1){
print($five[0] . "\n");
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split(/\t/, $all[0]);
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//, $bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("", @binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t", $id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) . "\n");
}
sub dec2bin {
return unpack("B32", pack("N", shift));
}
sub bin2dec {
return unpack("N", pack("B32", substr("0" x 32 . shift, -32)));
}