Skip to content

Commit

Permalink
Merge pull request #550 from OpenTrafficCam/bug/5669-replace-types-al…
Browse files Browse the repository at this point in the history
…l-in-mypy-in-pre-commit-configyaml

Bug/5669 replace types all in mypy in pre commit configyaml
  • Loading branch information
briemla authored Sep 13, 2024
2 parents 7dec8a2 + f6b3187 commit 7eb5958
Show file tree
Hide file tree
Showing 19 changed files with 291 additions and 512 deletions.
43 changes: 32 additions & 11 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,45 +1,66 @@
---
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
rev: v4.6.0
hooks:
- id: check-yaml
- id: check-json
- id: end-of-file-fixer
exclude_types: [json]
exclude_types:
- json
- id: trailing-whitespace
- id: no-commit-to-branch
- id: debug-statements
- id: requirements-txt-fixer
- id: check-executables-have-shebangs
- id: detect-private-key
- repo: local
hooks:
- id: update-type-stubs
name: Check for Type Stubs and Update Config
entry: ./update_precommit.py
language: system
files: ^requirements.*\.txt$
stages:
- commit
- repo: https://github.com/PyCQA/flake8
rev: 6.1.0
rev: 7.1.1
hooks:
- id: flake8
- repo: https://github.com/pycqa/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort
args: ["--profile", "black"]
args:
- --profile
- black
- repo: https://github.com/psf/black
rev: 24.2.0
rev: 24.8.0
hooks:
- id: black
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.6.1
rev: v1.11.1
hooks:
- id: mypy
entry: mypy .
additional_dependencies: [types-all, pydantic]
entry: mypy OTAnalytics tests
additional_dependencies:
- types-PyYAML
- types-flake8
- types-openpyxl
- types-pillow
- types-seaborn
- types-shapely
- types-tqdm
- types-ujson
always_run: true
pass_filenames: false
- repo: https://github.com/adrienverge/yamllint.git
rev: v1.35.1
hooks:
- id: yamllint
args: [-c=./.yamllint.yaml]
args:
- -c=./.yamllint.yaml
- repo: https://github.com/koalaman/shellcheck-precommit
rev: v0.9.0
rev: v0.10.0
hooks:
- id: shellcheck
4 changes: 2 additions & 2 deletions OTAnalytics/domain/track_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,13 +360,13 @@ def remove(self, ids: Sequence[str]) -> "TrackGeometryDataset":
raise NotImplementedError

@abstractmethod
def get_for(self, track_ids: Iterable[str]) -> "TrackGeometryDataset":
def get_for(self, track_ids: list[str]) -> "TrackGeometryDataset":
"""Get geometries for given track ids if they exist.
Ids that do not exist will not be included in the dataset.
Args:
track_ids (Iterable[str]): the track ids.
track_ids (list[str]): the track ids.
Returns:
TrackGeometryDataset: the dataset with tracks.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from itertools import chain
from typing import Any, Iterable, Literal, Sequence

from pandas import DataFrame, concat
from pandas import DataFrame, Series, concat
from pygeos import (
Geometry,
contains,
Expand Down Expand Up @@ -194,9 +194,7 @@ def __create_entries_from_dataframe(
new_y = filtered_tracks[track.Y] + offset.y * filtered_tracks[track.H]
tracks = concat([new_x, new_y], keys=[track.X, track.Y], axis=1)
tracks_by_id = tracks.groupby(level=LEVEL_TRACK_ID, group_keys=True)
geometries = tracks_by_id.apply(
lambda coords: linestrings(coords[track.X], coords[track.Y])
)
geometries = tracks_by_id.apply(convert_to_linestrings)
projections = calculate_all_projections(tracks)

result = concat([geometries, projections], keys=COLUMNS, axis=1)
Expand Down Expand Up @@ -230,7 +228,7 @@ def remove(self, ids: Sequence[str]) -> TrackGeometryDataset:
updated = self._dataset.drop(index=ids, errors="ignore")
return PygeosTrackGeometryDataset(self._offset, updated)

def get_for(self, track_ids: Iterable[str]) -> "TrackGeometryDataset":
def get_for(self, track_ids: list[str]) -> "TrackGeometryDataset":
_ids = self._dataset.index.intersection(track_ids)

filtered_df = self._dataset.loc[_ids]
Expand Down Expand Up @@ -339,7 +337,7 @@ def __eq__(self, other: Any) -> bool:
)


def calculate_all_projections(tracks: DataFrame) -> DataFrame:
def calculate_all_projections(tracks: DataFrame) -> Series:
tracks_by_id = tracks.groupby(level=0, group_keys=True)
tracks["last_x"] = tracks_by_id[track.X].shift(1)
tracks["last_y"] = tracks_by_id[track.Y].shift(1)
Expand All @@ -354,3 +352,7 @@ def calculate_all_projections(tracks: DataFrame) -> DataFrame:
"distance"
].cumsum()
return tracks.groupby(level=0, group_keys=True)["cum-distance"].agg(list)


def convert_to_linestrings(coords: DataFrame) -> Geometry:
return linestrings(coords[track.X], coords[track.Y])
103 changes: 56 additions & 47 deletions OTAnalytics/plugin_datastore/track_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy
import pandas
from more_itertools import batched
from pandas import DataFrame, MultiIndex, Series
from pandas import DataFrame, Index, MultiIndex, Series

from OTAnalytics.application.logger import logger
from OTAnalytics.domain import track
Expand Down Expand Up @@ -144,7 +144,7 @@ class PandasTrackClassificationCalculator(ABC):
"""

@abstractmethod
def calculate(self, detections: DataFrame) -> Series:
def calculate(self, detections: DataFrame) -> DataFrame:
"""Determine a track's classification.
Args:
Expand Down Expand Up @@ -278,11 +278,11 @@ def __iter__(self) -> Iterator[Track]:
yield from self.as_generator()

def as_generator(self) -> Generator[Track, None, None]:
if self._dataset.empty:
if (track_ids := self.get_index()) is None:
yield from []
track_ids = self.get_track_ids_as_string()
for current in track_ids:
yield self.__create_track_flyweight(current)
else:
for current in track_ids.array:
yield self._create_track_flyweight(current)

@staticmethod
def from_list(
Expand Down Expand Up @@ -354,7 +354,7 @@ def get_for(self, id: TrackId) -> Optional[Track]:
if self._dataset.empty:
return None
try:
return self.__create_track_flyweight(id.id)
return self._create_track_flyweight(id.id)
except KeyError:
return None

Expand Down Expand Up @@ -387,18 +387,22 @@ def _remove_from_geometry_dataset(
return updated_dataset

def as_list(self) -> list[Track]:
if self._dataset.empty:
return []
track_ids = self.get_track_ids_as_string()
logger().warning(
"Creating track flyweight objects which is really slow in "
f"'{PandasTrackDataset.as_list.__name__}'."
)
return [self.__create_track_flyweight(current) for current in track_ids]
if (track_ids := self.get_index()) is not None:
logger().warning(
"Creating track flyweight objects which is really slow in "
f"'{PandasTrackDataset.as_list.__name__}'."
)
return [self._create_track_flyweight(current) for current in track_ids]

def __create_track_flyweight(self, track_id: str) -> Track:
return []

def _create_track_flyweight(self, track_id: str) -> Track:
track_frame = self._dataset.loc[track_id, :]
return PandasTrack(track_id, track_frame)
if isinstance(track_frame, DataFrame):
return PandasTrack(track_id, track_frame)
if isinstance(track_frame, Series):
return PandasTrack(track_id, track_frame.to_frame(track_id))
raise NotImplementedError(f"Not implemented for {type(track_frame)}")

def get_data(self) -> DataFrame:
return self._dataset
Expand All @@ -407,27 +411,31 @@ def split(self, batches: int) -> Sequence["PandasTrackDataset"]:
dataset_size = len(self)
batch_size = ceil(dataset_size / batches)

new_batches = []
for batch_ids in batched(self.get_track_ids_as_string(), batch_size):
batch_dataset = self._dataset.loc[list(batch_ids), :]
batch_geometries = self._get_geometries_for(batch_ids)
new_batches.append(
PandasTrackDataset.from_dataframe(
batch_dataset,
self.track_geometry_factory,
batch_geometries,
calculator=self.calculator,
if (track_ids := self.get_index()) is not None:
new_batches = []
for batch_ids in batched(track_ids, batch_size):
batch_ids_as_list = list(batch_ids)
batch_dataset = self._dataset.loc[batch_ids_as_list, :]
batch_geometries = self._get_geometries_for(batch_ids_as_list)
new_batches.append(
PandasTrackDataset.from_dataframe(
batch_dataset,
self.track_geometry_factory,
batch_geometries,
calculator=self.calculator,
)
)
)
return new_batches
return new_batches

def get_track_ids_as_string(self) -> Sequence[str]:
return [self]

def get_index(self) -> Index | None:
if self._dataset.empty:
return []
return None
return self._dataset.index.get_level_values(LEVEL_TRACK_ID).unique()

def _get_geometries_for(
self, track_ids: Iterable[str]
self, track_ids: list[str]
) -> dict[RelativeOffsetCoordinate, TrackGeometryDataset]:
geometry_datasets = {}
for offset, geometry_dataset in self._geometry_datasets.items():
Expand All @@ -440,7 +448,9 @@ def __len__(self) -> int:
return len(self._dataset.index.get_level_values(LEVEL_TRACK_ID).unique())

def filter_by_min_detection_length(self, length: int) -> "PandasTrackDataset":
detection_counts_per_track = self._dataset.groupby(level=LEVEL_TRACK_ID).size()
detection_counts_per_track: Series[int] = self._dataset.groupby(
level=LEVEL_TRACK_ID
)[track.FRAME].size()
filtered_ids = detection_counts_per_track[
detection_counts_per_track >= length
].index
Expand Down Expand Up @@ -584,9 +594,7 @@ def cut_with_section(
intersection_points.keys()
)

def _create_cut_track_id(
self, row: DataFrame, cut_info: dict[str, list[int]]
) -> str:
def _create_cut_track_id(self, row: Series, cut_info: dict[str, list[int]]) -> str:
if (track_id := row[track.TRACK_ID]) in cut_info.keys():
cut_segment_index = bisect(cut_info[track_id], row["cumcount"])
return f"{track_id}_{cut_segment_index}"
Expand Down Expand Up @@ -670,18 +678,19 @@ def _get_dataset_with_classes(self, classes: list[str]) -> PandasTrackDataset:
dataset = self._other.get_data()
mask = dataset[track.TRACK_CLASSIFICATION].isin(classes)
filtered_df = dataset[mask]
tracks_to_keep = filtered_df.index.get_level_values(LEVEL_TRACK_ID).unique()
tracks_to_remove = tracks_to_keep.symmetric_difference(
self._other.get_track_ids_as_string()
)
updated_geometry_datasets = self._other._remove_from_geometry_dataset(
tracks_to_remove
)
# The pandas Index does not implement the Sequence interface, which causes
# compatibility issues with the PandasTrackDataset._remove_from_geometry method
# when trying to remove geometries for tracks that have been deleted.
# To address this, we invalidate the entire geometry cache rather than
# attempting selective removal.
# This approach is acceptable because track removal only occurs when
# cutting tracks, which is a rare use case.

return PandasTrackDataset(
self._other.track_geometry_factory,
filtered_df,
updated_geometry_datasets,
self._other.calculator,
track_geometry_factory=self._other.track_geometry_factory,
dataset=filtered_df,
geometry_datasets=None,
calculator=self._other.calculator,
)

def add_all(self, other: Iterable[Track]) -> TrackDataset:
Expand Down
7 changes: 4 additions & 3 deletions OTAnalytics/plugin_filter/dataframe_filter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from abc import ABC
from datetime import datetime
from typing import Iterable, Optional

from pandas import DataFrame, Series
from pandas import DataFrame

from OTAnalytics.application.plotting import GetCurrentFrame
from OTAnalytics.application.use_cases.video_repository import GetVideos
Expand Down Expand Up @@ -35,7 +36,7 @@ def conjunct_with(
return DataFrameConjunction(self, other)


class DataFramePredicate(Predicate[DataFrame, DataFrame]):
class DataFramePredicate(Predicate[DataFrame, DataFrame], ABC):
"""Checks DataFrame entries against predicate.
Entries that do not fulfill predicate are filtered out.
Expand Down Expand Up @@ -274,7 +275,7 @@ def _reset(self) -> None:
self._result = None

def _extend_complex_predicate(
self, predicate: Predicate[DataFrame, Series]
self, predicate: Predicate[DataFrame, DataFrame]
) -> None:
if self._complex_predicate:
self._complex_predicate = self._complex_predicate.conjunct_with(predicate)
Expand Down
Loading

0 comments on commit 7eb5958

Please sign in to comment.