Skip to content

Commit

Permalink
Fix an edge case in deletions
Browse files Browse the repository at this point in the history
  • Loading branch information
johnlees committed Feb 16, 2024
1 parent aa358f9 commit 37dd92f
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 24 deletions.
63 changes: 43 additions & 20 deletions run_grid_scan.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import subprocess
import sys
from tqdm import tqdm

weed_cmd_rep = "ska weed -o no_ambig.skf --min-freq 1 --filter no-ambig-or-const test_grid.skf"
weed_cmd_norep = "ska weed -o ambig.skf --min-freq 1 --filter no-const test_grid.skf"
Expand Down Expand Up @@ -31,25 +32,47 @@ def compare_hits(repeats: bool):
for line in f:
expected.add(str(line.rstrip().split("\t")[0:2]))
f.close()
power = len(found.intersection(expected)) / len(expected)
if len(expected) > 0:
power = len(found.intersection(expected)) / len(expected)
else:
power = 1.0
return power

print(f"k\tpi\tindel_rate\tRepeats\tPower")
for k in [17, 31, 63]:
for dist in [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]:
for indel_rate in [0.05, 0.1, 0.2]:
for repeat in range(10):
sim_cmd = f"ska_sim -k {k} -e {dist} -i {indel_rate} > seq.fa"
subprocess.run(sim_cmd, shell=True)
ska_cmd = f"ska build --single-strand -k {k} -o test_grid pneumo.fa seq.fa"
subprocess.run(ska_cmd, shell=True, stderr=subprocess.DEVNULL)

subprocess.run(weed_cmd_rep, shell=True, stderr=subprocess.DEVNULL)
overlap = compare_hits(True)
print(f"{k}\t{dist}\t{indel_rate}\t{repeat}\tNo ambiguity\t{overlap}")

subprocess.run(weed_cmd_norep, shell=True, stderr=subprocess.DEVNULL)
overlap = compare_hits(False)
print(f"{k}\t{dist}\t{indel_rate}\t{repeat}\tAllow ambiguity\t{overlap}")

sys.exit(0)
def avg_range(overlaps: list):
overlaps.sort()
overlaps = overlaps[1:20:]
avg = sum(overlaps) / len(overlaps)
top = max(overlaps)
bottom = min(overlaps)
return (bottom, avg, top)

def main():
print(f"k\tpi\tindel_rate\tPower_b\tAverage power\tPower_t")
with tqdm(total=3*7*3*20) as pbar:
for k in [17, 31, 63]:
for dist in [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.03, 0.1]:
for indel_rate in [0.0, 0.1, 0.2]:
overlap_rep = list()
overlap_norep = list()
for _repeat in range(20):
sim_cmd = f"ska_sim -k {k} -e {dist} -i {indel_rate} > seq.fa"
subprocess.run(sim_cmd, shell=True)
ska_cmd = f"ska build --single-strand -k {k} -o test_grid pneumo.fa seq.fa"
subprocess.run(ska_cmd, shell=True, stderr=subprocess.DEVNULL)

subprocess.run(weed_cmd_rep, shell=True, stderr=subprocess.DEVNULL)
overlap_rep.append(compare_hits(True))
subprocess.run(weed_cmd_norep, shell=True, stderr=subprocess.DEVNULL)
overlap_norep.append(compare_hits(False))
pbar.update(1)

stats = avg_range(overlap_rep)
print(f"{k}\t{dist}\t{indel_rate}\tNo ambiguity\t{stats[0]}\t{stats[1]}\t{stats[2]}")
stats = avg_range(overlap_norep)
print(f"{k}\t{dist}\t{indel_rate}\tAllow ambiguity\t{stats[0]}\t{stats[1]}\t{stats[2]}")


sys.exit(0)

if __name__ == "__main__":
main()
9 changes: 5 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,17 +205,18 @@ fn main() {
new_weights.append(&mut site_rates(size, &gamma, &mut rng));
weights.splice(mutated_pos..(mutated_pos + 1), new_weights);
} else {
// Deletion
sites -= size;
/*
eprintln!(
"DEL {mutated_pos}:{}",
String::from_utf8(start_seq[mutated_pos..(mutated_pos + size)].to_vec())
.unwrap()
);
*/
start_seq.drain(mutated_pos..(mutated_pos + size));
weights.drain(mutated_pos..(mutated_pos + size));
let end_del = sites.min(mutated_pos + size);
start_seq.drain(mutated_pos..end_del);
weights.drain(mutated_pos..end_del);
// Deletion
sites -= size;
}
// Need new gamma heterogeneity
pos_dist = WeightedIndex::new(&weights).unwrap();
Expand Down

0 comments on commit 37dd92f

Please sign in to comment.