-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy path2_getLargeData.pl
65 lines (56 loc) · 1.67 KB
/
2_getLargeData.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/perl
# -----------------------------------------
# Updated Date: 2014/03/24
# Input: several sub-files generated by extractLargeData.pl
# Output: the control or the treatment collected data that count each item (transcript)
# Environemt: Linux or Windows
# Description: Generate the control or the treatment collected file showing each transcript with its counting.
# -----------------------------------------
use strict;
if(scalar(@ARGV) < 3) {
# die("Usage: perl ./getTranscript.pl <check1.sam> <check2.sam> <check3.sam> <check4.sam> <output.sam>\n");
# total passed files = output + all check1.sam, not containing passed value
die("Usage: perl ./getTranscript.pl <total passed files> <output.sam> <check1.sam> [<check2.sam>, [<check3.sam>]]\n");
}
# global variables
my %transcript = ();
my @temp = ();
my $fstRef = 0;
my $getTrans = "";
# -1 in scalar(@ARGV): output file
for (my $times = 2; $times < scalar(@ARGV); $times++) {
if(! open(fin,"$ARGV[$times]")) {
die("Error: Make sure that $ARGV[$times] exists.\n");
}
print "$ARGV[$times]\n";
foreach my $line (<fin>) {
chomp($line);
@temp = split("\t",$line);
# notice: fstRef was not needed to initate to 0 due to the dividing the origin sam file
if($fstRef == 0) {
$fstRef = 1;
$getTrans = $temp[2];
if(exists($transcript{$getTrans})) {
$transcript{$getTrans} += 1;
}
else {
$transcript{$getTrans} = 1;
}
}
else {
$fstRef = 0;
next;
}
}
}
#print scalar(keys(%transcript));
#print "\n";
if(! open(fout,">$ARGV[1]")){
close(fin);
die("Error: Output file went wrong.\n");
}
foreach my $key (keys(%transcript)) {
print fout "$key\t$transcript{$key}\n";
}
close(fin);
close(fout);