Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more bigwigaverageoverbed stats #42

Merged
merged 7 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bigtools/resources/test/bwaob_intervals.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
chr17 59800 60000 one
chr17 62000 64000 two
59 changes: 48 additions & 11 deletions bigtools/src/utils/cli/bigwigaverageoverbed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ pub struct BigWigAverageOverBedArgs {
#[arg(long)]
pub end: Option<u32>,

/// If set, include two additional columns containing the min and max observed in the area
#[arg(long)]
pub min_max: bool,

/// Set the number of threads to use. This tool will nearly always benefit from more cores.
/// Note: for parts of the runtime, the actual usage may be nthreads+1
#[arg(short = 't', long)]
Expand All @@ -62,6 +66,7 @@ pub fn bigwigaverageoverbed(
let bigwigpath = args.bigwig;
let bedinpath = args.bedin;
let bedoutpath = args.output;
let add_min_max = args.min_max;

let reopen = ReopenableFile {
path: bigwigpath.to_string(),
Expand Down Expand Up @@ -103,6 +108,7 @@ pub fn bigwigaverageoverbed(
end: u64,
bedinpath: String,
name: Name,
add_min_max: bool,
inbigwig: &mut BigWigRead<R>,
) -> Result<File, Box<dyn Error + Send + Sync>> {
let mut tmp = tempfile::tempfile()?;
Expand Down Expand Up @@ -130,10 +136,22 @@ pub fn bigwigaverageoverbed(
}
};

let stats = format!(
"{}\t{}\t{:.3}\t{:.3}\t{:.3}",
entry.size, entry.bases, entry.sum, entry.mean0, entry.mean
);
let stats = match add_min_max {
true => format!(
"{}\t{}\t{:.3}\t{:.3}\t{:.3}\t{:.3}\t{:.3}",
entry.size,
entry.bases,
entry.sum,
entry.mean0,
entry.mean,
entry.min,
entry.max
),
false => format!(
"{}\t{}\t{:.3}\t{:.3}\t{:.3}",
entry.size, entry.bases, entry.sum, entry.mean0, entry.mean
),
};
writeln!(&mut tmp, "{}\t{}", entry.name, stats)?
}

Expand Down Expand Up @@ -164,7 +182,8 @@ pub fn bigwigaverageoverbed(
Err(_) => break,
};

let result = process_chunk(start, end, bedinpath, name, &mut inbigwig);
let result =
process_chunk(start, end, bedinpath, name, add_min_max, &mut inbigwig);
result_sender.send(result).unwrap();
}
};
Expand Down Expand Up @@ -194,8 +213,14 @@ pub fn bigwigaverageoverbed(
}
};

let result =
process_chunk(start, chrom, bedinpath, name, &mut inbigwig);
let result = process_chunk(
start,
chrom,
bedinpath,
name,
add_min_max,
&mut inbigwig,
);
result_sender.send(result).unwrap();
}
}
Expand Down Expand Up @@ -239,10 +264,22 @@ pub fn bigwigaverageoverbed(
Err(e) => return Err(e.into()),
};

let stats = format!(
"{}\t{}\t{:.3}\t{:.3}\t{:.3}",
entry.size, entry.bases, entry.sum, entry.mean0, entry.mean
);
let stats = match add_min_max {
true => format!(
"{}\t{}\t{:.3}\t{:.3}\t{:.3}\t{:.3}\t{:.3}",
entry.size,
entry.bases,
entry.sum,
entry.mean0,
entry.mean,
entry.min,
entry.max
),
false => format!(
"{}\t{}\t{:.3}\t{:.3}\t{:.3}",
entry.size, entry.bases, entry.sum, entry.mean0, entry.mean
),
};
writeln!(&mut bedoutwriter, "{}\t{}", entry.name, stats)?
}
}
Expand Down
17 changes: 14 additions & 3 deletions bigtools/src/utils/misc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ pub struct BigWigAverageOverBedEntry {
pub sum: f64,
pub mean0: f64,
pub mean: f64,
pub min: f64,
pub max: f64,
}

#[derive(Error, Debug)]
Expand All @@ -32,6 +34,9 @@ pub enum StatsError {
InvalidNameCol(String),
}

/// Returns a `BigWigAverageOverBedEntry` for a bigWig over a given interval.
/// If there are no values for the given region, then `f64::NAN` is given for
/// `mean`, `min`, and `max`, and `0` is given for `mean0`.
pub fn stats_for_bed_item<R: BBIFileRead>(
name: Name,
chrom: &str,
Expand All @@ -51,17 +56,21 @@ pub fn stats_for_bed_item<R: BBIFileRead>(

let mut bases = 0;
let mut sum = 0.0;
let mut min = f64::MAX;
let mut max = f64::MIN;
for val in interval {
let num_bases = val.end - val.start;
bases += num_bases;
sum += f64::from(num_bases) * f64::from(val.value);
min = min.min(f64::from(val.value));
max = max.max(f64::from(val.value));
}
let size = end - start;
let mean0 = sum / f64::from(size);
let mean = if bases == 0 {
0.0
let (mean, min, max) = if bases == 0 {
(f64::NAN, f64::NAN, f64::NAN)
} else {
sum / f64::from(bases)
(sum / f64::from(bases), min, max)
};

let name = match name {
Expand Down Expand Up @@ -89,6 +98,8 @@ pub fn stats_for_bed_item<R: BBIFileRead>(
sum,
mean0,
mean,
min,
max,
})
}

Expand Down
Loading
Loading