Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: normalize pair distances only if max_rul is set #47

Merged
merged 2 commits into from
Dec 8, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions rul_datasets/core.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Basic data modules for experiments involving only a single subset of any RUL
dataset. """

from typing import Dict, List, Optional, Tuple, Any, Callable
from typing import Dict, List, Optional, Tuple, Any, Callable, cast, Union

import numpy as np
import pytorch_lightning as pl
Expand Down Expand Up @@ -411,17 +411,17 @@ def __init__(
elif mode == "labeled":
self._get_pair_func = self._get_labeled_pair_idx

def _get_max_rul(self):
def _get_max_rul(self) -> Optional[int]:
max_ruls = [dm.reader.max_rul for dm in self.dms]
if all(m is None for m in max_ruls):
max_rul = 1e10
max_rul = None
elif any(m is None for m in max_ruls):
raise ValueError(
"PairedRulDataset needs a set max_rul for all or none of the readers "
"but at least one and not all of them has None."
)
else:
max_rul = max(max_ruls)
max_rul = max(cast(List[int], max_ruls))

return max_rul

Expand Down Expand Up @@ -470,7 +470,7 @@ def __next__(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tens
else:
raise StopIteration

def _get_pair_idx(self) -> Tuple[int, int, int, int, int]:
def _get_pair_idx(self) -> Tuple[int, int, int, Union[int, float], int]:
chosen_run_idx = self._rng.integers(0, len(self._features))
domain_label = self._run_domain_idx[chosen_run_idx]
chosen_run = self._features[chosen_run_idx]
Expand All @@ -480,7 +480,7 @@ def _get_pair_idx(self) -> Tuple[int, int, int, int, int]:
low=0,
high=run_length - self.min_distance,
)
end_idx = min(run_length, anchor_idx + self._max_rul)
end_idx = min(run_length, anchor_idx + (self._max_rul or 999999))
query_idx = self._rng.integers(
low=anchor_idx + self.min_distance,
high=end_idx,
Expand All @@ -489,7 +489,7 @@ def _get_pair_idx(self) -> Tuple[int, int, int, int, int]:

return chosen_run_idx, anchor_idx, query_idx, distance, domain_label

def _get_pair_idx_piecewise(self) -> Tuple[int, int, int, int, int]:
def _get_pair_idx_piecewise(self) -> Tuple[int, int, int, Union[int, float], int]:
chosen_run_idx = self._rng.integers(0, len(self._features))
domain_label = self._run_domain_idx[chosen_run_idx]
chosen_run = self._features[chosen_run_idx]
Expand All @@ -511,7 +511,7 @@ def _get_pair_idx_piecewise(self) -> Tuple[int, int, int, int, int]:

return chosen_run_idx, anchor_idx, query_idx, distance, domain_label

def _get_labeled_pair_idx(self) -> Tuple[int, int, int, int, int]:
def _get_labeled_pair_idx(self) -> Tuple[int, int, int, Union[int, float], int]:
chosen_run_idx = self._rng.integers(0, len(self._features))
domain_label = self._run_domain_idx[chosen_run_idx]
chosen_run = self._features[chosen_run_idx]
Expand All @@ -527,7 +527,7 @@ def _get_labeled_pair_idx(self) -> Tuple[int, int, int, int, int]:
high=run_length,
)
# RUL label difference is negative time step difference
distance = int(chosen_labels[anchor_idx] - chosen_labels[query_idx])
distance = (chosen_labels[anchor_idx] - chosen_labels[query_idx]).item()

return chosen_run_idx, anchor_idx, query_idx, distance, domain_label

Expand All @@ -536,13 +536,15 @@ def _build_pair(
run: torch.Tensor,
anchor_idx: int,
query_idx: int,
distance: int,
distance: Union[int, float],
domain_label: int,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
anchors = run[anchor_idx]
queries = run[query_idx]
domain_tensor = torch.tensor(domain_label, dtype=torch.float)
distances = torch.tensor(distance, dtype=torch.float) / self._max_rul
distances = torch.clamp_max(distances, max=1) # max distance is max_rul
distances = torch.tensor(distance, dtype=torch.float)
if self._max_rul is not None: # normalize only if max_rul is set
distances /= self._max_rul
distances = torch.clamp_max(distances, max=1)

return anchors, queries, distances, domain_tensor