Skip to content

Commit

Permalink
Merge branch 'trim-reads' into 'master'
Browse files Browse the repository at this point in the history
read trimming

See merge request machine-learning/bonito!29
  • Loading branch information
iiSeymour committed Apr 19, 2021
2 parents 9abea07 + f3a3af2 commit 9739d6b
Showing 1 changed file with 28 additions and 5 deletions.
33 changes: 28 additions & 5 deletions bonito/fast5.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,14 @@ def __init__(self, read, filename):
self.start = read_attrs['start_time'] / self.sampling_rate
self.duration = read_attrs['duration'] / self.sampling_rate

# no trimming
self.template_start = self.start
self.template_duration = self.duration

raw = read.handle[read.raw_dataset_name][:]
scaled = np.array(self.scaling * (raw + self.offset), dtype=np.float32)

trim_start, _ = trim(scaled[:8000])
scaled = scaled[trim_start:]
self.template_start = self.start + (1 / self.sampling_rate) * trim_start
self.template_duration = self.duration + (1 / self.sampling_rate) * trim_start

if len(scaled) > 8000:
med, mad = med_mad(scaled)
self.signal = (scaled - med) / mad
Expand Down Expand Up @@ -69,6 +70,28 @@ def __repr__(self):
return "ReadChunk('%s')" % self.read_id


def trim(signal, window_size=40, threshold_factor=2.4, min_elements=3):

min_trim = 10
signal = signal[min_trim:]

med, mad = med_mad(signal[-(window_size*100):])

threshold = med + mad * threshold_factor
num_windows = len(signal) // window_size

for pos in range(num_windows):
start = pos * window_size
end = start + window_size
window = signal[start:end]
if len(window[window > threshold]) > min_elements:
if window[-1] > threshold:
continue
return min(end + min_trim, len(signal)), len(signal)

return min_trim, len(signal)


def med_mad(x, factor=1.4826):
"""
Calculate signal median and median absolute deviation
Expand Down Expand Up @@ -164,4 +187,4 @@ def get_reads(directory, read_ids=None, skip=False, max_read_size=0, n_proc=1, r
yield read

if cancel is not None and cancel.is_set():
return
return

0 comments on commit 9739d6b

Please sign in to comment.