diff --git a/run_grid_scan.py b/run_grid_scan.py index c9700c4..0b0afdb 100644 --- a/run_grid_scan.py +++ b/run_grid_scan.py @@ -1,5 +1,6 @@ import subprocess import sys +from tqdm import tqdm weed_cmd_rep = "ska weed -o no_ambig.skf --min-freq 1 --filter no-ambig-or-const test_grid.skf" weed_cmd_norep = "ska weed -o ambig.skf --min-freq 1 --filter no-const test_grid.skf" @@ -31,25 +32,47 @@ def compare_hits(repeats: bool): for line in f: expected.add(str(line.rstrip().split("\t")[0:2])) f.close() - power = len(found.intersection(expected)) / len(expected) + if len(expected) > 0: + power = len(found.intersection(expected)) / len(expected) + else: + power = 1.0 return power -print(f"k\tpi\tindel_rate\tRepeats\tPower") -for k in [17, 31, 63]: - for dist in [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]: - for indel_rate in [0.05, 0.1, 0.2]: - for repeat in range(10): - sim_cmd = f"ska_sim -k {k} -e {dist} -i {indel_rate} > seq.fa" - subprocess.run(sim_cmd, shell=True) - ska_cmd = f"ska build --single-strand -k {k} -o test_grid pneumo.fa seq.fa" - subprocess.run(ska_cmd, shell=True, stderr=subprocess.DEVNULL) - - subprocess.run(weed_cmd_rep, shell=True, stderr=subprocess.DEVNULL) - overlap = compare_hits(True) - print(f"{k}\t{dist}\t{indel_rate}\t{repeat}\tNo ambiguity\t{overlap}") - - subprocess.run(weed_cmd_norep, shell=True, stderr=subprocess.DEVNULL) - overlap = compare_hits(False) - print(f"{k}\t{dist}\t{indel_rate}\t{repeat}\tAllow ambiguity\t{overlap}") - -sys.exit(0) \ No newline at end of file +def avg_range(overlaps: list): + overlaps.sort() + overlaps = overlaps[1:20:] + avg = sum(overlaps) / len(overlaps) + top = max(overlaps) + bottom = min(overlaps) + return (bottom, avg, top) + +def main(): + print(f"k\tpi\tindel_rate\tPower_b\tAverage power\tPower_t") + with tqdm(total=3*7*3*20) as pbar: + for k in [17, 31, 63]: + for dist in [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.03, 0.1]: + for indel_rate in [0.0, 0.1, 0.2]: + overlap_rep = list() + overlap_norep = list() + for _repeat in range(20): + sim_cmd = f"ska_sim -k {k} -e {dist} -i {indel_rate} > seq.fa" + subprocess.run(sim_cmd, shell=True) + ska_cmd = f"ska build --single-strand -k {k} -o test_grid pneumo.fa seq.fa" + subprocess.run(ska_cmd, shell=True, stderr=subprocess.DEVNULL) + + subprocess.run(weed_cmd_rep, shell=True, stderr=subprocess.DEVNULL) + overlap_rep.append(compare_hits(True)) + subprocess.run(weed_cmd_norep, shell=True, stderr=subprocess.DEVNULL) + overlap_norep.append(compare_hits(False)) + pbar.update(1) + + stats = avg_range(overlap_rep) + print(f"{k}\t{dist}\t{indel_rate}\tNo ambiguity\t{stats[0]}\t{stats[1]}\t{stats[2]}") + stats = avg_range(overlap_norep) + print(f"{k}\t{dist}\t{indel_rate}\tAllow ambiguity\t{stats[0]}\t{stats[1]}\t{stats[2]}") + + + sys.exit(0) + +if __name__ == "__main__": + main() diff --git a/src/main.rs b/src/main.rs index 26bfd7d..58813e2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -205,8 +205,6 @@ fn main() { new_weights.append(&mut site_rates(size, &gamma, &mut rng)); weights.splice(mutated_pos..(mutated_pos + 1), new_weights); } else { - // Deletion - sites -= size; /* eprintln!( "DEL {mutated_pos}:{}", @@ -214,8 +212,11 @@ fn main() { .unwrap() ); */ - start_seq.drain(mutated_pos..(mutated_pos + size)); - weights.drain(mutated_pos..(mutated_pos + size)); + let end_del = sites.min(mutated_pos + size); + start_seq.drain(mutated_pos..end_del); + weights.drain(mutated_pos..end_del); + // Deletion + sites -= size; } // Need new gamma heterogeneity pos_dist = WeightedIndex::new(&weights).unwrap();