Skip to content

Commit

Permalink
updated dhtslib, dparasail, and now we add PG header line, closes #1, c…
Browse files Browse the repository at this point in the history
…loses #17
  • Loading branch information
charlesgregory committed Apr 28, 2021
1 parent 5b85f5b commit 4a3e28d
Show file tree
Hide file tree
Showing 8 changed files with 119 additions and 102 deletions.
4 changes: 2 additions & 2 deletions dub.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
"description": "A minimal D application.",
"license": "MIT",
"dependencies": {
"dparasail":"~>0.0.7",
"dhtslib": "~>0.11.1+htslib-1.10"
"dparasail":"~>0.1.2",
"dhtslib": "~>0.12.3+htslib-1.10"
},
"configurations":[
{
Expand Down
46 changes: 22 additions & 24 deletions source/analysis.d
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ struct Align_Result

/// Align the sofclip to the read region or the mate region
Align_Result align_clip(bool left)(SAMReader* bam, IndexedFastaFile* fai, Parasail* p,
SAMRecord* rec, ReadStatus* status, uint clip_len, Mutex* m,
SAMRecord rec, ReadStatus* status, uint clip_len, Mutex* m,
int artifact_floor_length, int align_buffer_size)
{
string q_seq;
string ref_seq;
float cutoff;
long start, end;
parasail_query res;
// parasail_query res;
Align_Result alignment;

//if clip too short
Expand All @@ -50,43 +50,41 @@ Align_Result align_clip(bool left)(SAMReader* bam, IndexedFastaFile* fai, Parasa
start = 0;
}

end = rec.pos() + rec.cigar.ref_bases_covered() + align_buffer_size;
end = rec.pos() + rec.cigar.alignedLength() + align_buffer_size;

//if end>length of chrom: end is length of chrom
if (end > bam.target_lens[rec.tid])
if (end > bam.header.targetLength(rec.tid))
{
end = bam.target_lens[rec.tid];
end = bam.header.targetLength(rec.tid);
}

m.lock();
//get read region seq
ref_seq = fai.fetchSequence(bam.target_names[rec.tid], start, end).toUpper;
ref_seq = fai.fetchSequence(bam.header.targetName(rec.tid).idup, start, end).toUpper;
m.unlock();

//align
res = p.sw_striped(q_seq, ref_seq);
scope (exit)
res.close();
auto res = p.sw_striped(q_seq, ref_seq);
// ClipStatus clip = left ? status.left : status.right;
if ((res.cigar.ops.length == 0) | (res.cigar.ops.length > 10))
if ((res.cigar.length == 0) | (res.cigar.length > 10))
return alignment;

static if (left)
{
if (res.cigar.ops[$ - 1].op == Ops.EQUAL)
if (res.cigar[$ - 1].op == Ops.EQUAL)
{
if (res.result.score > cutoff)
if (res.score > cutoff)
{
auto clips = parse_clips(res.cigar);
if (clips[1].length != 0 || clips[0].length == 0)
return alignment;

status.art_left = true;
status.mate_left = false;
alignment.alignment = bam.target_names[rec.tid] ~ "," ~ (start + res.beg_ref)
alignment.alignment = bam.header.targetName(rec.tid).idup ~ "," ~ (start + res.position)
.to!string ~ "," ~ res.cigar.toString;
auto overlap = start + res.beg_ref >= rec.pos - clip_len
? start + res.beg_ref - (rec.pos - clip_len) : 0;
auto overlap = start + res.position >= rec.pos - clip_len
? start + res.position - (rec.pos - clip_len) : 0;
auto plen = (rec.length - clips[0].length) + (overlap);
plen = plen > rec.length ? rec.length : plen;
alignment.stem_loop = rec.sequence[0 .. plen].idup;
Expand All @@ -97,22 +95,22 @@ Align_Result align_clip(bool left)(SAMReader* bam, IndexedFastaFile* fai, Parasa
}
else
{
if (res.cigar.ops[0].op == Ops.EQUAL)
if (res.cigar[0].op == Ops.EQUAL)
{
if (res.result.score > cutoff)
if (res.score > cutoff)
{
auto clips = parse_clips(res.cigar);
if (clips[0].length != 0 || clips[1].length == 0)
return alignment;

status.art_right = true;
status.mate_right = false;
alignment.alignment = bam.target_names[rec.tid] ~ "," ~ (start + res.beg_ref)
alignment.alignment = bam.header.targetName(rec.tid).idup ~ "," ~ (start + res.position)
.to!string ~ "," ~ res.cigar.toString;
auto overlap = rec.pos + rec.cigar.ref_bases_covered + clip_len >= start
+ res.beg_ref + res.cigar.ref_bases_covered
? (rec.pos + rec.cigar.ref_bases_covered + clip_len) - (
start + res.beg_ref + res.cigar.ref_bases_covered) : 0;
auto overlap = rec.pos + rec.cigar.alignedLength + clip_len >= start
+ res.position + res.cigar.alignedLength
? (rec.pos + rec.cigar.alignedLength + clip_len) - (
start + res.position + res.cigar.alignedLength) : 0;
auto plen = (rec.length - clips[1].length) + (overlap);
plen = plen > rec.length ? rec.length : plen;
alignment.stem_loop = rec.sequence[$ - plen .. $].idup;
Expand Down Expand Up @@ -162,7 +160,7 @@ string self_align(bool left)(SAMReader* bam, string fai_f, Parasail* p,
// if(res.result.score>cutoff){
// status.art_left=true;
// status.mate_left=false;
align_string = rec.queryName.idup ~ "," ~ (res.beg_ref).to!string ~ "," ~ res
align_string = rec.queryName.idup ~ "," ~ (res.position).to!string ~ "," ~ res
.cigar.toString;
// }
// }
Expand All @@ -173,7 +171,7 @@ string self_align(bool left)(SAMReader* bam, string fai_f, Parasail* p,
// if(res.result.score>cutoff){
// status.art_right=true;
// status.mate_right=false;
align_string = rec.queryName.idup ~ "," ~ (res.beg_ref).to!string ~ "," ~ res
align_string = rec.queryName.idup ~ "," ~ (res.position).to!string ~ "," ~ res
.cigar.toString;
// }
// }
Expand Down
22 changes: 17 additions & 5 deletions source/anno.d
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,28 @@ import std.array : array;
import core.sync.mutex : Mutex;
import std.parallelism : parallel;
import dhtslib;
import htslib.hts_log;
import dparasail;
import readstatus;
import analysis;
import util;

void annotate(string[] args, ubyte con, int artifact_floor_length, int align_buffer_size)
void annotate(string cl,string[] args, ubyte con, int artifact_floor_length, int align_buffer_size)
{
hts_log_warning("[fade annotate]","Output SAM/BAM will not be sorted (regaurdless of prior sorting)");
auto bam = SAMReader(args[1]);
// auto fai=IndexedFastaFile(args[2]);
auto out_bam = getWriter(con, bam.header);
auto header = bam.header.dup;
header.addLine(
RecordType.PG,
"ID", "fade-annotate",
"PN", "fade",
"VN", VERSION,
"PP", header.valueByPos(RecordType.PG, header.numRecords(RecordType.PG) - 1, "ID"),
"CL", cl
);
auto out_bam = getWriter(con, header);

//0 Read is Softclipped
// sc
//1 Read has Supp Alignment
Expand All @@ -40,7 +52,7 @@ void annotate(string[] args, ubyte con, int artifact_floor_length, int align_buf
{
ReadStatus status;
if (rec.isSupplementary() || rec.isSecondary() || !rec.isMapped()
|| rec.cigar.ops.filter!(x => x.op == Ops.SOFT_CLIP).count() == 0)
|| rec.cigar[].filter!(x => x.op == Ops.SOFT_CLIP).count() == 0)
{
rec["rs"] = status.raw;
m.lock;
Expand All @@ -57,13 +69,13 @@ void annotate(string[] args, ubyte con, int artifact_floor_length, int align_buf
Align_Result align_1, align_2;
if (clips[0].length != 0)
{
align_1 = align_clip!true(&bam, &fai, &p, &rec, &status,
align_1 = align_clip!true(&bam, &fai, &p, rec, &status,
clips[0].length(), &m, artifact_floor_length, align_buffer_size);
}
//right soft-clip
if (clips[1].length() != 0)
{
align_2 = align_clip!false(&bam, &fai, &p, &rec, &status,
align_2 = align_clip!false(&bam, &fai, &p, rec, &status,
clips[1].length(), &m, artifact_floor_length, align_buffer_size);
}
// writeln(status.raw);
Expand Down
10 changes: 6 additions & 4 deletions source/app.d
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import std.stdio;
import std.getopt;
import std.parallelism : defaultPoolThreads;
import filter : filter;
import std.array : join;
import anno;
import stats;
import remap;
Expand All @@ -24,6 +25,7 @@ string full_help = "Fragmentase Artifact Detection and Elimination\n" ~ "usage:

void main(string[] args)
{
auto cl = join(args," ");
if (args.length == 1)
{
auto res = getopt(args, config.bundling);
Expand Down Expand Up @@ -60,7 +62,7 @@ void main(string[] args)
stderr.writeln("please use only one of the b or u flags");
return;
}
annotate(args[1 .. $], con, artifact_floor_length, align_buffer_size);
annotate(cl, args[1 .. $], con, artifact_floor_length, align_buffer_size);
}
else if (args[1] == "out")
{
Expand Down Expand Up @@ -89,9 +91,9 @@ void main(string[] args)
return;
}
if (clip)
filter!(true)(args[1 .. $], con);
filter!(true)(cl, args[1 .. $], con);
else
filter!(false)(args[1 .. $], con);
filter!(false)(cl, args[1 .. $], con);
}
else if (args[1] == "extract")
{
Expand All @@ -117,7 +119,7 @@ void main(string[] args)
stderr.writeln("please use only one of the b or u flags");
return;
}
remapArtifacts(args[1 .. $], con);
remapArtifacts(cl, args[1 .. $], con);
}
else if (args[1] == "stats")
{
Expand Down
48 changes: 29 additions & 19 deletions source/filter.d
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import util;

void clipRead(SAMRecord* rec, ReadStatus* status)
{
auto new_cigar = rec.cigar.ops.dup;
auto new_cigar = rec.cigar.dup;
auto qual = rec.qscores();
if (status.art_left)
{
Expand All @@ -22,13 +22,13 @@ void clipRead(SAMRecord* rec, ReadStatus* status)
.front.splitter(",").drop(2).front);

//assert left side is soft-clipped
if (art_cigar.ops[0].op != Ops.SOFT_CLIP)
if (art_cigar[0].op != Ops.SOFT_CLIP)
{
writeln((*rec)["am"]);
debug assert(false);
else return;
}
if (art_cigar.ref_bases_covered > rec.length)
if (art_cigar.alignedLength > rec.length)
{
writeln((*rec)["am"]);
debug assert(false);
Expand All @@ -39,13 +39,13 @@ void clipRead(SAMRecord* rec, ReadStatus* status)
new_cigar = new_cigar[1 .. $];
assert(new_cigar[0].op == Ops.SOFT_CLIP);

rec.b.core.pos += art_cigar.ref_bases_covered - new_cigar[0].length;
rec.b.core.pos += art_cigar.alignedLength - new_cigar[0].length;

//trim sequence
rec.sequence = rec.sequence[art_cigar.ref_bases_covered .. $];
rec.qscores(qual[art_cigar.ref_bases_covered .. $]);
rec.sequence = rec.sequence[art_cigar.alignedLength .. $];
rec.qscores(qual[art_cigar.alignedLength .. $]);

auto len_to_clip = art_cigar.ref_bases_covered;
auto len_to_clip = art_cigar.alignedLength;

while (len_to_clip > 0)
{
Expand All @@ -67,7 +67,7 @@ void clipRead(SAMRecord* rec, ReadStatus* status)
new_cigar = new_cigar[1 .. $];
}
}
new_cigar = CigarOp(art_cigar.ref_bases_covered, Ops.HARD_CLIP) ~ new_cigar;
new_cigar = CigarOp(art_cigar.alignedLength, Ops.HARD_CLIP) ~ new_cigar[];
}
if (status.art_right)
{
Expand All @@ -76,27 +76,27 @@ void clipRead(SAMRecord* rec, ReadStatus* status)
.drop(1).front.splitter(",").drop(2).front);

//assert right side is soft-clipped
if (art_cigar.ops[$ - 1].op != Ops.SOFT_CLIP)
if (art_cigar[$ - 1].op != Ops.SOFT_CLIP)
{
writeln((*rec)["am"]);
debug assert(false);
else return;
}
if (art_cigar.ref_bases_covered > rec.length)
if (art_cigar.alignedLength > rec.length)
{
writeln((*rec)["am"]);
debug assert(false);
else return;
}

//trim sequence
rec.sequence = rec.sequence[0 .. $ - art_cigar.ref_bases_covered];
rec.qscores(qual[0 .. $ - art_cigar.ref_bases_covered]);
rec.sequence = rec.sequence[0 .. $ - art_cigar.alignedLength];
rec.qscores(qual[0 .. $ - art_cigar.alignedLength]);

if (new_cigar[$ - 1].op == Ops.HARD_CLIP)
new_cigar = new_cigar[0 .. $ - 1];
assert(new_cigar[$ - 1].op == Ops.SOFT_CLIP);
auto len_to_clip = art_cigar.ref_bases_covered;
auto len_to_clip = art_cigar.alignedLength;

while (len_to_clip > 0)
{
Expand All @@ -118,15 +118,15 @@ void clipRead(SAMRecord* rec, ReadStatus* status)
new_cigar = new_cigar[0 .. $ - 1];
}
}
new_cigar = new_cigar ~ CigarOp(art_cigar.ref_bases_covered, Ops.HARD_CLIP);
new_cigar = new_cigar[] ~ CigarOp(art_cigar.alignedLength, Ops.HARD_CLIP);
}
rec.cigar = Cigar(new_cigar);
rec.cigar = new_cigar;
}

SAMRecord makeArtifactRecord(SAMRecord* original, bool left, bool mate)
{
auto rec = new SAMRecord(bam_dup1(original.b), original.h);
rec.sequence = reverse_complement_sam_record(&rec);
auto rec = SAMRecord(bam_dup1(original.b), original.h);
rec.sequence = reverse_complement_sam_record(rec);
// writeln(original.queryName);
// writeln(rec["am"].toString);
// rec.q_scores!false(cast(char[])(cast(ubyte[])((*original).qscores!false).retro.array));
Expand Down Expand Up @@ -160,10 +160,20 @@ SAMRecord makeArtifactRecord(SAMRecord* original, bool left, bool mate)
return rec;
}

void filter(bool clip)(string[] args, ubyte con)
void filter(bool clip)(string cl, string[] args, ubyte con)
{
auto bam = SAMReader(args[1]);
auto out_bam = getWriter(con, bam.header);
auto header = bam.header.dup;
header.addLine(
RecordType.PG,
"ID", "fade-annotate",
"PN", "fade",
"VN", VERSION,
"PP", header.valueByPos(RecordType.PG, header.numRecords(RecordType.PG) - 1, "ID"),
"CL", cl
);
auto out_bam = getWriter(con, header);

Stats stats;
static if (clip == true)
{
Expand Down
Loading

0 comments on commit 4a3e28d

Please sign in to comment.