-
Notifications
You must be signed in to change notification settings - Fork 2
/
fastqcParser.pl
executable file
·79 lines (70 loc) · 1.88 KB
/
fastqcParser.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env perl
=hey
Author: Shijian Sky Zhang
E-mail: zhangsjsky@pku.edu.cn
=cut
use 5.010;
use warnings;
use strict;
use Getopt::Long;
use File::Basename;
use lib dirname $0;
use pm::common;
sub usage{
my $scriptName = basename $0;
print <<HELP;
Usage: perl $scriptName OPTION fastqc_data.txt >OUTPUT.tsv
If fastqc_data.txt isn't specified, input from STDIN
Option:
-h --help Print this help information
HELP
}
GetOptions(
'h|help' => sub{usage(); exit}
) || usage();
$ARGV[0] = '-' unless defined $ARGV[0];
open IN, "$ARGV[0]" or die "Can't read file ($ARGV[0]): $!";
my $line = <IN>;
my ($totalSeq, $seqLength, $gcContent, $meanQual);
while(defined $line){
chomp $line;
if($line =~ /^Total Sequence\s+(\d+)$/){
$totalSeq = $1;
$line = <IN>;
next;
}
if($line =~ /^Sequence length\s+(\d+)$/){
$seqLength = $1;
$line = <IN>;
next;
}
if($line =~ /^%GC\s+(\d+)$/){
$gcContent = $1;
$line = <IN>;
next;
}
if($line =~ /^>>Per base sequence quality/){
if($seqLength -~ /-/){
$meanQual = "Not available when read lengths aren't uniform";
}else{
<IN>;
my $totalQual = 0;
while($line = <IN>){
last if $line eq '>>END_MODULE';
my ($pos, $mean) = split "\t", $line;
my ($from, $to) = split '-', $pos;
if(defined $to){
$totalQual += ($to-$from+1) * $mean;
}else{
$totalQual += $mean;
}
}
$meanQual = $totalQual / $seqLength;
}
}
$line = <IN>;
}
say join "\t", ("Reads", $totalSeq);
say join "\t", ("Read Length", $seqLength);
say join "\t", ("GC Content", $gcContent);
say join "\t", ("Mean Quality", $meanQual);