Skip to content

Commit

Permalink
Add support for haven_labelled in the data explorer (#634)
Browse files Browse the repository at this point in the history
* Add support for `haven_labelled` in the data explorer

* Add haven to CI
  • Loading branch information
dfalbel authored Nov 27, 2024
1 parent 7af2c44 commit 316f723
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 0 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ jobs:
data.table
rstudioapi
tibble
haven

- name: Setup SSH access
uses: mxschmitt/action-tmate@v3
Expand Down
26 changes: 26 additions & 0 deletions crates/ark/src/data_explorer/histogram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ mod tests {
use stdext::assert_match;

use super::*;
use crate::fixtures::package_is_installed;
use crate::r_task;

fn default_options() -> FormatOptions {
Expand Down Expand Up @@ -606,4 +607,29 @@ mod tests {
);
})
}

#[test]
fn test_frequency_table_haven_labelled() {
r_task(|| {
if !package_is_installed("haven") {
return;
}

test_frequency_table(
"haven::labelled(c(rep(1, 100), rep(2, 200), rep(3, 150)), labels = c('A' = 1, 'B' = 2, 'C' = 3))",
10,
harp::parse_eval_global("c('B', 'C', 'A')").unwrap(),
vec![200, 150, 100],
None,
);
// Account for all factor levels, even if they don't appear in the data
test_frequency_table(
"haven::labelled(c(rep(1, 100), rep(2, 200)), labels = c('A' = 1, 'B' = 2, 'C' = 3))",
10,
harp::parse_eval_global("c('B', 'A', 'C')").unwrap(),
vec![200, 100, 0],
None,
);
})
}
}
29 changes: 29 additions & 0 deletions crates/ark/src/data_explorer/summary_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ where
#[cfg(test)]
mod tests {
use super::*;
use crate::fixtures::package_is_installed;

fn default_options() -> FormatOptions {
FormatOptions {
Expand Down Expand Up @@ -327,4 +328,32 @@ mod tests {
assert_eq!(stats.date_stats, Some(expected));
})
}

#[test]
fn test_haven_labelled() {
crate::r_task(|| {
if !package_is_installed("haven") {
return;
}

let column =
harp::parse_eval_base("haven::labelled(c(1, 1, 2), c(Male = 1, Female = 2))")
.unwrap();

let column_factor =
harp::parse_eval_base("factor(c(1,1,2), labels = c('Male', 'Female'))").unwrap();

let stats =
summary_stats(column.sexp, ColumnDisplayType::String, &default_options()).unwrap();

let stats_factor = summary_stats(
column_factor.sexp,
ColumnDisplayType::String,
&default_options(),
)
.unwrap();

assert_eq!(stats, stats_factor);
})
}
}
7 changes: 7 additions & 0 deletions crates/ark/src/data_explorer/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ pub fn display_type(x: SEXP) -> ColumnDisplayType {
}

if r_is_object(x) {
// `haven_labelled` objects inherit from their internal data type
// such as integer or character. We special case them here before
// checking the internal types below.
if r_inherits(x, "haven_labelled") {
return ColumnDisplayType::String;
}

if r_inherits(x, "logical") {
return ColumnDisplayType::Boolean;
}
Expand Down
11 changes: 11 additions & 0 deletions crates/ark/src/fixtures/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use serde::Serialize;
use tree_sitter::Point;

use crate::modules;
use crate::modules::ARK_ENVS;

// Lock for tests that can't be run concurrently. Only needed for tests that can't
// be wrapped in an `r_task()`.
Expand Down Expand Up @@ -92,6 +93,16 @@ where
}
}

pub fn package_is_installed(package: &str) -> bool {
harp::parse_eval0(
format!(".ps.is_installed('{package}')").as_str(),
ARK_ENVS.positron_ns,
)
.unwrap()
.try_into()
.unwrap()
}

#[cfg(test)]
mod tests {
use tree_sitter::Point;
Expand Down
8 changes: 8 additions & 0 deletions crates/ark/src/modules/positron/r_data_explorer.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ summary_stats_number <- function(col) {
}

summary_stats_string <- function(col) {
if (inherits(col, 'haven_labelled')) {
col <- haven::as_factor(col)
}

if(is.factor(col)) {
# We could have an optimization here to get unique and empty values
# from levels, but probably not worth it.
Expand Down Expand Up @@ -463,6 +467,10 @@ profile_frequency_table <- function(x, limit) {
))
}

if (inherits(x, "haven_labelled")) {
x <- haven::as_factor(x)
}

if (is.factor(x)) {
values <- levels(x)
counts <- table(x)
Expand Down

0 comments on commit 316f723

Please sign in to comment.