Skip to content

Commit

Permalink
new split into blocks functions
Browse files Browse the repository at this point in the history
  • Loading branch information
peter-urban committed Nov 22, 2024
1 parent 8a01970 commit a6ef5dc
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 3 deletions.
1 change: 0 additions & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ project(
'themachinethatgoesping_pingprocessing',
'cpp',
license: 'MPL-2.0',

version: '0.9.1',
default_options: ['warning_level=2', 'buildtype=release', 'cpp_std=c++20'],
meson_version: '>=1.3.2' #first version with clang-cl openmp support
Expand Down
3 changes: 3 additions & 0 deletions python/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,16 @@ sources = [
'themachinethatgoesping/pingprocessing/filter_pings/by_files.py',
'themachinethatgoesping/pingprocessing/filter_pings/by_region.py',
'themachinethatgoesping/pingprocessing/filter_pings/by_time.py',
'themachinethatgoesping/pingprocessing/filter_pings/by_time_list.py',
'themachinethatgoesping/pingprocessing/split_pings/__init__.py',
'themachinethatgoesping/pingprocessing/split_pings/by_channel_id.py',
'themachinethatgoesping/pingprocessing/split_pings/by_distance.py',
'themachinethatgoesping/pingprocessing/split_pings/by_file.py',
'themachinethatgoesping/pingprocessing/split_pings/by_function_return.py',
'themachinethatgoesping/pingprocessing/split_pings/by_time.py',
'themachinethatgoesping/pingprocessing/split_pings/by_time_blocks.py',
'themachinethatgoesping/pingprocessing/split_pings/into_ping_blocks.py',
'themachinethatgoesping/pingprocessing/split_pings/into_time_blocks.py',
'themachinethatgoesping/pingprocessing/watercolumn/__init__.py',
'themachinethatgoesping/pingprocessing/watercolumn/echograms/__init__.py',
'themachinethatgoesping/pingprocessing/watercolumn/echograms/echodata.py',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
from .by_time import by_time
from .by_region import by_latlon
from .by_features import by_features
from .by_files import by_folders, by_files
from .by_files import by_folders, by_files
from .by_time_list import by_time_list, by_ping_times
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@

import numpy as np

from typing import List
import numpy as np

from themachinethatgoesping.pingprocessing.core.progress import get_progress_iterator
from themachinethatgoesping.echosounders import filetemplates
I_Ping = filetemplates.I_Ping


def by_time_list(
pings: List[I_Ping],
times: List[float],
max_time_diff_seconds: float = 10,
progress: bool = False) -> List[I_Ping]:
"""
Filter pings by time.
Parameters
----------
pings : List[I_Ping]
List of ping objects to be filtered.
times : List[float]
List of timestamps.
max_time_diff_seconds : float
Maximum difference in seconds between valid ping and closest timestamp.
progress : bool, optional
If True, show a progress bar, by default False.
Returns
-------
List[I_Ping]
List of ping objects filtered by time.
"""
# Get progress iterator
it = get_progress_iterator(pings, progress, desc = "Filter pings by time")

# Filter pings by timestamps
filtered_pings = []
times_index = 0
for ping in it:
t = ping.get_timestamp()
while times_index < len(times) and t - times[times_index] > max_time_diff_seconds:
times_index += 1
if times_index >= len(times):
break
if abs(t - times[times_index]) <= max_time_diff_seconds:
filtered_pings.append(ping)
times_index += 1

return filtered_pings

def by_ping_times(
pings: List[I_Ping],
reference_pings: List[I_Ping],
max_time_diff_seconds: float = 10,
progress: bool = False) -> List[I_Ping]:
"""
Filter pings by time.
Parameters
----------
pings : List[I_Ping]
List of ping objects to be filtered.
reference_pings : List[I_Ping]
List of reference ping objects.
max_time_diff_seconds : float
Maximum difference in seconds between valid ping and closest reference ping.
progress : bool, optional
If True, show a progress bar, by default False.
Returns
-------
List[I_Ping]
List of ping objects filtered by time.
"""
# Get progress iterator
it = get_progress_iterator(pings, progress, desc = "Filter pings by time")

# Filter pings by timestamps
filtered_pings = []
reference_pings_index = 0
for ping in it:
t = ping.get_timestamp()
while reference_pings_index < len(reference_pings) and t-reference_pings[reference_pings_index].get_timestamp() > max_time_diff_seconds:
reference_pings_index += 1
if reference_pings_index >= len(reference_pings):
print(reference_pings_index,len(reference_pings))
break
if abs(t - reference_pings[reference_pings_index].get_timestamp()) <= max_time_diff_seconds:
filtered_pings.append(ping)
reference_pings_index += 1

return filtered_pings
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@
from .by_channel_id import by_channel_id
from .by_file import by_file_nr, by_file_path
from .by_function_return import by_function_return
from .by_time_blocks import by_time_blocks
from .by_time_blocks import by_time_blocks
from .into_time_blocks import into_time_blocks
from .into_ping_blocks import into_ping_blocks
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@

from collections import defaultdict
import numpy as np

import themachinethatgoesping as theping
from themachinethatgoesping.pingprocessing.core.progress import get_progress_iterator
from themachinethatgoesping.echosounders import filetemplates
I_Ping = filetemplates.I_Ping

from typing import List, Dict, Union
from collections import defaultdict
import numpy as np
from pytimeparse2 import parse as timeparse
import dateutil

def into_ping_blocks(
pings: List[I_Ping],
block_size: int,
max_ping_time_difference=None,
overlap0: int = 0,
overlap1: int = 0,
progress: bool = False) -> Dict[int, List[I_Ping]]:
"""
Splits a list of pings into blocks of a specified size, with optional overlaps and time difference constraints.
Args:
pings (List[I_Ping]): List of pings to be split into blocks.
block_size (int): The size of each block.
max_ping_time_difference (Optional[int]): Maximum allowed time difference between pings in a block.
If None, all pings are considered to be in the same time block.
overlap0 (int, optional): Number of pings to overlap at the start of each block. Defaults to 0.
overlap1 (int, optional): Number of pings to overlap at the end of each block. Defaults to 0.
progress (bool, optional): If True, displays a progress bar. Defaults to False.
Returns:
Dict[int, List[I_Ping]]: A dictionary where keys are block numbers and values are lists of pings in each block.
"""

if max_ping_time_difference is None:
ping_time_blocks = {0 : pings}
else:
ping_time_blocks = theping.pingprocessing.split_pings.by_time_difference(pings,max_ping_time_difference)

it = get_progress_iterator(pings, progress, desc = "Split pings into ping blocks")

split_pings = defaultdict(list)
block_nr = 0

for pingblock in ping_time_blocks.values():
for i in range(0, len(pingblock), block_size):
i0 = i - overlap0
if i0 < 0:
i0 = 0

i1 = i + block_size + overlap1
if i1 > len(pingblock):
i1 = len(pingblock)

if i1 == i0:
break

split_pings[block_nr] = pingblock[i0:i1]
block_nr += 1

# new block for next pingblock
block_nr += 1

return split_pings


Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@

from collections import defaultdict
import numpy as np

from themachinethatgoesping.pingprocessing.core.progress import get_progress_iterator
from themachinethatgoesping.echosounders import filetemplates
I_Ping = filetemplates.I_Ping

from typing import List, Dict, Union
from collections import defaultdict
import numpy as np
from pytimeparse2 import parse as timeparse
import dateutil
import datetime

def into_time_blocks(
pings: List[I_Ping],
timeblock_size: Union[datetime.timedelta, str, float, int],
overlap0: Union[datetime.timedelta, str, float, int] = 0,
overlap1: Union[datetime.timedelta, str, float, int] = 0,
full_hour_base: bool = True,
progress: bool = False) -> Dict[int, List[I_Ping]]:
"""
Splits a list of pings into time blocks.
Args:
pings (List[I_Ping]): List of pings to be split.
timeblock_size (Union[datetime.timedelta, str, float, int]): Size of each time block. Can be a timedelta, string, float, or int.
overlap0 (Union[datetime.timedelta, str, float, int], optional): Overlap time before the start of each block. Defaults to 0.
overlap1 (Union[datetime.timedelta, str, float, int], optional): Overlap time after the end of each block. Defaults to 0.
full_hour_base (bool, optional): If True, the first time block will start at the beginning of the hour. Defaults to True.
progress (bool, optional): If True, displays a progress bar. Defaults to False.
Returns:
Dict[int, List[I_Ping]]: Dictionary where keys are the start times of the blocks and values are lists of pings in those blocks.
"""

if not isinstance(timeblock_size, datetime.timedelta) and not isinstance(timeblock_size,dateutil.relativedelta.relativedelta):
timeblock_size = timeparse(timeblock_size, as_timedelta=True)
if not isinstance(overlap0, datetime.timedelta) and not isinstance(overlap0,dateutil.relativedelta.relativedelta):
overlap0 = timeparse(overlap0, as_timedelta=True)
if not isinstance(overlap1, datetime.timedelta) and not isinstance(overlap1,dateutil.relativedelta.relativedelta):
overlap1 = timeparse(overlap1, as_timedelta=True)

it = get_progress_iterator(pings, progress, desc = "Spliting pings into ping time blocks")

first_time = pings[0].get_datetime()
if full_hour_base:
first_time = first_time.replace(minute=0, second=0, microsecond=0)

max_time = first_time + timeblock_size + overlap1
first_time -= overlap0

split_pings = defaultdict(list)

last_timestamp = np.nan
for ping in it:
dt = ping.get_datetime()

while dt > max_time:
first_time += timeblock_size
max_time += timeblock_size

split_pings[first_time].append(ping)

return split_pings


1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ tqdm
matplotlib
rasterio >= 1.3.10
ipywidgets
pytimeparse2 >= 1.7.1

0 comments on commit a6ef5dc

Please sign in to comment.