Skip to content

Commit

Permalink
rerun_py.dataframe: Add APIs for using_index_values, `fill_latest_a…
Browse files Browse the repository at this point in the history
…t`, and `filter_is_not_null` (#7680)

### What
- Resolves: #7455
- DNM: requires #7683 

### TODO
- [x] The unit test currently fails with what looks like a bug

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested the web demo (if applicable):
* Using examples from latest `main` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/7680?manifest_url=https://app.rerun.io/version/main/examples_manifest.json)
* Using full set of examples from `nightly` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/7680?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json)
* [x] The PR title and labels are set such as to maximize their
usefulness for the next release's CHANGELOG
* [x] If applicable, add a new check to the [release
checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)!
* [x] If have noted any breaking changes to the log API in
`CHANGELOG.md` and the migration guide

- [PR Build Summary](https://build.rerun.io/pr/7680)
- [Recent benchmark results](https://build.rerun.io/graphs/crates.html)
- [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)

To run all checks from `main`, comment on the PR with `@rerun-bot
full-check`.

---------

Co-authored-by: Clement Rey <cr.rey.clement@gmail.com>
  • Loading branch information
jleibs and teh-cmc authored Oct 11, 2024
1 parent 9f23ae0 commit b69be17
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 0 deletions.
30 changes: 30 additions & 0 deletions rerun_py/rerun_bindings/rerun_bindings.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,42 @@ class RecordingView:
"""
Filter the view to only include data at the given index values.
The index values returned will be the intersection between the provided values and the
original index values.
This requires index values to be a precise match. Index values in Rerun are
represented as i64 sequence counts or nanoseconds. This API does not expose an interface
in floating point seconds, as the numerical conversion would risk false mismatches.
"""
...

def filter_is_not_null(self, column: AnyColumn) -> RecordingView:
"""
Filter the view to only include rows where the given column is not null.
This corresponds to rows for index values where this component was provided to Rerun explicitly
via `.log()` or `.send_columns()`.
"""
...

def using_index_values(self, values: IndexValuesLike) -> RecordingView:
"""
Replace the index in the view with the provided values.
The output view will always have the same number of rows as the provided values, even if
those rows are empty. Use with `.fill_latest_at()` to populate these rows with the most
recent data.
This requires index values to be a precise match. Index values in Rerun are
represented as i64 sequence counts or nanoseconds. This API does not expose an interface
in floating point seconds, as the numerical conversion would risk false mismatches.
"""
...

def fill_latest_at(self) -> RecordingView:
"""Populate any null values in a row with the latest valid data on the timeline."""
...

def select(self, *args: AnyColumn, columns: Optional[Sequence[AnyColumn]] = None) -> pa.RecordBatchReader: ...

class Recording:
Expand Down
34 changes: 34 additions & 0 deletions rerun_py/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,40 @@ impl PyRecordingView {
query_expression,
})
}

fn filter_is_not_null(&self, column: AnyComponentColumn) -> Self {
let column = column.into_selector();

let mut query_expression = self.query_expression.clone();
query_expression.filtered_point_of_view = Some(column);

Self {
recording: self.recording.clone(),
query_expression,
}
}

fn using_index_values(&self, values: IndexValuesLike<'_>) -> PyResult<Self> {
let values = values.to_index_values()?;

let mut query_expression = self.query_expression.clone();
query_expression.using_index_values = Some(values);

Ok(Self {
recording: self.recording.clone(),
query_expression,
})
}

fn fill_latest_at(&self) -> Self {
let mut query_expression = self.query_expression.clone();
query_expression.sparse_fill_strategy = SparseFillStrategy::LatestAtGlobal;

Self {
recording: self.recording.clone(),
query_expression,
}
}
}

impl PyRecording {
Expand Down
54 changes: 54 additions & 0 deletions rerun_py/tests/unit/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ def setup_method(self) -> None:
type=rr.components.Position3D.arrow_type(),
)

self.empty_pos = pa.array(
[],
type=rr.components.Position3D.arrow_type(),
)

def test_recording_info(self) -> None:
assert self.recording.application_id() == APP_ID
assert self.recording.recording_id() == str(RECORDING_ID)
Expand Down Expand Up @@ -188,6 +193,55 @@ def test_index_values(self) -> None:
with pytest.raises(TypeError):
view.filter_index_values(pa.array([1.0, 2.0], type=pa.float64()))

def test_using_index_values(self) -> None:
view = self.recording.view(index="my_index", contents="points")
view = view.using_index_values([0, 5, 9])

table = view.select().read_all().combine_chunks()

# my_index, log_time, log_tick, points, colors
assert table.num_columns == 5
assert table.num_rows == 3

expected_index = pa.chunked_array([
pa.array(
[0, 5, 9],
type=pa.int64(),
)
])

assert table.column("my_index").equals(expected_index)
assert not table.column("/points:Position3D")[0].is_valid
assert not table.column("/points:Position3D")[1].is_valid
assert not table.column("/points:Position3D")[2].is_valid

table = view.fill_latest_at().select().read_all().combine_chunks()

assert table.num_columns == 5
assert table.num_rows == 3

assert table.column("my_index").equals(expected_index)
assert not table.column("/points:Position3D")[0].is_valid
assert table.column("/points:Position3D")[1].values.equals(self.expected_pos0)
assert table.column("/points:Position3D")[2].values.equals(self.expected_pos1)

def test_filter_is_not_null(self) -> None:
view = self.recording.view(index="my_index", contents="points")

color = rr.dataframe.ComponentColumnSelector("points", rr.components.Color)

view = view.filter_is_not_null(color)

table = view.select().read_all()

# my_index, log_time, log_tick, points, colors
assert table.num_columns == 5
assert table.num_rows == 1

assert table.column("my_index")[0].equals(self.expected_index1[0])

assert table.column("/points:Position3D")[0].values.equals(self.expected_pos1)

def test_view_syntax(self) -> None:
good_content_expressions = [
{"points": rr.components.Position3D},
Expand Down

0 comments on commit b69be17

Please sign in to comment.