Skip to content

Commit

Permalink
fix: Explicit stringsAsFactors = FALSE for R <= 3.6 (#135)
Browse files Browse the repository at this point in the history
Closes #131.
  • Loading branch information
paleolimbot authored Mar 1, 2023
1 parent b696163 commit 9447c63
Show file tree
Hide file tree
Showing 12 changed files with 87 additions and 78 deletions.
10 changes: 7 additions & 3 deletions .github/workflows/r-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ name: test-r

on:
push:
branches: main
branches:
- main
pull_request:
branches: main
branches:
- main
paths:
- '.github/workflows/r-check.yaml'
- 'src/nanoarrow/**'
Expand All @@ -38,6 +40,7 @@ jobs:
matrix:
config:
- {os: macOS-latest, r: 'release'}
- {os: windows-latest, r: '3.6'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
Expand Down Expand Up @@ -70,13 +73,14 @@ jobs:

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
extra-packages: any::rcmdcheck, arrow=?ignore-before-r=4.0.0
needs: check
working-directory: r

- uses: r-lib/actions/check-r-package@v2
env:
ARROW_R_VERBOSE_TEST: "true"
_R_CHECK_FORCE_SUGGESTS_: false
with:
upload-snapshots: true
working-directory: r
Expand Down
2 changes: 1 addition & 1 deletion r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ RoxygenNote: 7.2.3
URL: https://github.com/apache/arrow-nanoarrow
BugReports: https://github.com/apache/arrow-nanoarrow/issues
Suggests:
arrow (>= 10.0.0),
arrow (>= 9.0.0),
blob,
hms,
rlang,
Expand Down
2 changes: 1 addition & 1 deletion r/tests/testthat/test-altrep.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ test_that("nanoarrow_altrep_chr_force_materialize() forces materialization", {
expect_identical(nanoarrow_altrep_force_materialize(x_altrep), 1L)

x <- as_nanoarrow_array(letters, schema = na_string())
x_altrep_df <- data.frame(x = nanoarrow_altrep_chr(x))
x_altrep_df <- data.frame(x = nanoarrow_altrep_chr(x), stringsAsFactors = FALSE)
expect_identical(
nanoarrow_altrep_force_materialize(x_altrep_df, recursive = FALSE),
0L
Expand Down
4 changes: 2 additions & 2 deletions r/tests/testthat/test-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ test_that("schemaless array list interface works for non-nested types", {
})

test_that("schemaless array list interface works for nested types", {
array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
array <- as_nanoarrow_array(data.frame(a = 1L, b = "two", stringsAsFactors = FALSE))
nanoarrow_array_set_schema(array, NULL)

expect_length(array$children, 2L)
Expand Down Expand Up @@ -203,7 +203,7 @@ test_that("array list interface classes offset buffers for relevant types", {
})

test_that("array list interface works for nested types", {
array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
array <- as_nanoarrow_array(data.frame(a = 1L, b = "two", stringsAsFactors = FALSE))

expect_named(array$children, c("a", "b"))
expect_s3_class(array$children[[1]], "nanoarrow_array")
Expand Down
5 changes: 4 additions & 1 deletion r/tests/testthat/test-convert-array-stream.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,10 @@ test_that("convert array stream works for nested data.frames", {
})

test_that("convert array stream works for struct-style vectors", {
raw_posixlt <- as.data.frame(unclass(as.POSIXlt("2021-01-01", tz = "America/Halifax")))
raw_posixlt <- as.data.frame(
unclass(as.POSIXlt("2021-01-01", tz = "America/Halifax")),
stringsAsFactors = FALSE
)

stream <- as_nanoarrow_array_stream(raw_posixlt)
expect_identical(
Expand Down
17 changes: 12 additions & 5 deletions r/tests/testthat/test-convert-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ test_that("convert_array() errors for unsupported array", {
})

test_that("convert to vector works for data.frame", {
df <- data.frame(a = 1L, b = "two", c = 3, d = TRUE)
df <- data.frame(a = 1L, b = "two", c = 3, d = TRUE, stringsAsFactors = FALSE)
array <- as_nanoarrow_array(df)

expect_identical(convert_array(array, NULL), df)
Expand All @@ -80,10 +80,12 @@ test_that("convert to vector works for data.frame", {
})

test_that("convert to vector works for partial_frame", {
array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
array <- as_nanoarrow_array(
data.frame(a = 1L, b = "two", stringsAsFactors = FALSE)
)
expect_identical(
convert_array(array, vctrs::partial_frame()),
data.frame(a = 1L, b = "two")
data.frame(a = 1L, b = "two", stringsAsFactors = FALSE)
)
})

Expand All @@ -108,7 +110,9 @@ test_that("convert to vector works for function()", {
})

test_that("convert to vector works for tibble", {
array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
array <- as_nanoarrow_array(
data.frame(a = 1L, b = "two", stringsAsFactors = FALSE)
)
expect_identical(
convert_array(array, tibble::tibble(a = integer(), b = character())),
tibble::tibble(a = 1L, b = "two")
Expand All @@ -135,7 +139,10 @@ test_that("convert to vector works for struct-style vectors", {
array <- as_nanoarrow_array(as.POSIXlt("2021-01-01", tz = "America/Halifax"))
expect_identical(
convert_array(array),
as.data.frame(unclass(as.POSIXlt("2021-01-01", tz = "America/Halifax")))
as.data.frame(
unclass(as.POSIXlt("2021-01-01", tz = "America/Halifax")),
stringsAsFactors = FALSE
)
)

array <- as_nanoarrow_array(as.POSIXlt("2021-01-01", tz = "America/Halifax"))
Expand Down
6 changes: 4 additions & 2 deletions r/tests/testthat/test-infer-ptype.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,10 @@ test_that("infer_nanoarrow_ptype() works for basic types", {
)

expect_identical(
infer_nanoarrow_ptype(as_nanoarrow_array(data.frame(x = character()))),
data.frame(x = character())
infer_nanoarrow_ptype(
as_nanoarrow_array(data.frame(x = character(), stringsAsFactors = FALSE))
),
data.frame(x = character(), stringsAsFactors = FALSE)
)
})

Expand Down
10 changes: 8 additions & 2 deletions r/tests/testthat/test-util.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,13 @@ test_that("new_data_frame() works", {
})

test_that("vector fuzzers work", {
ptype <- data.frame(a = logical(), b = integer(), c = double(), d = character())
ptype <- data.frame(
a = logical(),
b = integer(),
c = double(),
d = character(),
stringsAsFactors = FALSE
)
df_gen <- vec_gen(ptype, n = 123)

expect_identical(nrow(df_gen), 123L)
Expand All @@ -66,7 +72,7 @@ test_that("vector fuzzers work", {
})

test_that("vector shuffler works", {
df <- data.frame(letters = letters)
df <- data.frame(letters = letters, stringsAsFactors = FALSE)
df_shuffled <- vec_shuffle(df)
expect_setequal(df_shuffled$letters, df$letters)

Expand Down
4 changes: 2 additions & 2 deletions src/nanoarrow/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,8 @@ static ArrowErrorCode ArrowArrayCheckInternalBufferSizes(
if (actual_size < expected_size) {
ArrowErrorSet(
error,
"Expected buffer %d to size >= %ld bytes but found buffer with %ld bytes", i,
(long)expected_size, (long)actual_size);
"Expected buffer %d to size >= %ld bytes but found buffer with %ld bytes",
(int)i, (long)expected_size, (long)actual_size);
return EINVAL;
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/nanoarrow/nanoarrow.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,10 @@ const char* ArrowErrorMessage(struct ArrowError* error);
/// @{

/// \brief Return a version string in the form "major.minor.patch"
const char* ArrowNanoarrowVersion();
const char* ArrowNanoarrowVersion(void);

/// \brief Return an integer that can be used to compare versions sequentially
int ArrowNanoarrowVersionInt();
int ArrowNanoarrowVersionInt(void);

/// \brief Initialize a description of buffer arrangements from a storage type
void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type);
Expand Down
97 changes: 42 additions & 55 deletions src/nanoarrow/schema.c
Original file line number Diff line number Diff line change
Expand Up @@ -1211,6 +1211,24 @@ static int64_t ArrowSchemaTypeToStringInternal(struct ArrowSchemaView* schema_vi
}
}

// Helper for bookeeping to emulate sprintf()-like behaviour spread
// among multiple sprintf calls.
static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last,
int64_t* n_remaining, int64_t* n_chars) {
*n_chars += n_chars_last;
*n_remaining -= n_chars_last;

// n_remaining is never less than 0
if (*n_remaining < 0) {
*n_remaining = 0;
}

// Can't do math on a NULL pointer
if (*out != NULL) {
*out += n_chars_last;
}
}

int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n,
char recursive) {
if (schema == NULL) {
Expand All @@ -1237,90 +1255,59 @@ int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n,

// Uncommon but not technically impossible that both are true
if (is_extension && is_dictionary) {
n_chars_last = snprintf(out + n_chars, n, "%.*s{dictionary(%s)<",
(int)schema_view.extension_name.size_bytes,
schema_view.extension_name.data,
ArrowTypeString(schema_view.storage_type));
n_chars_last = snprintf(
out, n, "%.*s{dictionary(%s)<", (int)schema_view.extension_name.size_bytes,
schema_view.extension_name.data, ArrowTypeString(schema_view.storage_type));
} else if (is_extension) {
n_chars_last =
snprintf(out + n_chars, n, "%.*s{", (int)schema_view.extension_name.size_bytes,
schema_view.extension_name.data);
n_chars_last = snprintf(out, n, "%.*s{", (int)schema_view.extension_name.size_bytes,
schema_view.extension_name.data);
} else if (is_dictionary) {
n_chars_last = snprintf(out + n_chars, n, "dictionary(%s)<",
ArrowTypeString(schema_view.storage_type));
n_chars_last =
snprintf(out, n, "dictionary(%s)<", ArrowTypeString(schema_view.storage_type));
}

n_chars += n_chars_last;
n -= n_chars_last;
if (n < 0) {
n = 0;
}
ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);

if (!is_dictionary) {
n_chars_last = ArrowSchemaTypeToStringInternal(&schema_view, out + n_chars, n);
n_chars_last = ArrowSchemaTypeToStringInternal(&schema_view, out, n);
} else {
n_chars_last = ArrowSchemaToString(schema->dictionary, out + n_chars, n, recursive);
n_chars_last = ArrowSchemaToString(schema->dictionary, out, n, recursive);
}

n_chars += n_chars_last;
n -= n_chars_last;
if (n < 0) {
n = 0;
}
ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);

if (recursive && schema->format[0] == '+') {
n_chars_last = snprintf(out + n_chars, n, "<");
n_chars += n_chars_last;
n -= n_chars_last;
if (n < 0) {
n = 0;
}
n_chars_last = snprintf(out, n, "<");
ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);

for (int64_t i = 0; i < schema->n_children; i++) {
if (i > 0) {
n_chars_last = snprintf(out + n_chars, n, ", ");
n_chars += n_chars_last;
n -= n_chars_last;
if (n < 0) {
n = 0;
}
n_chars_last = snprintf(out, n, ", ");
ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
}

// ArrowSchemaToStringInternal() will validate the child and print the error,
// but we need the name first
if (schema->children[i] != NULL && schema->children[i]->release != NULL &&
schema->children[i]->name != NULL) {
n_chars_last = snprintf(out + n_chars, n, "%s: ", schema->children[i]->name);
n_chars += n_chars_last;
n -= n_chars_last;
if (n < 0) {
n = 0;
}
n_chars_last = snprintf(out, n, "%s: ", schema->children[i]->name);
ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
}

n_chars_last =
ArrowSchemaToString(schema->children[i], out + n_chars, n, recursive);
n_chars += n_chars_last;
n -= n_chars_last;
if (n < 0) {
n = 0;
}
n_chars_last = ArrowSchemaToString(schema->children[i], out, n, recursive);
ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
}

n_chars_last = snprintf(out + n_chars, n, ">");
n_chars += n_chars_last;
n -= n_chars_last;
if (n < 0) {
n = 0;
}
n_chars_last = snprintf(out, n, ">");
ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
}

if (is_extension && is_dictionary) {
n_chars += snprintf(out + n_chars, n, ">}");
n_chars += snprintf(out, n, ">}");
} else if (is_extension) {
n_chars += snprintf(out + n_chars, n, "}");
n_chars += snprintf(out, n, "}");
} else if (is_dictionary) {
n_chars += snprintf(out + n_chars, n, ">");
n_chars += snprintf(out, n, ">");
}

return n_chars;
Expand Down
4 changes: 2 additions & 2 deletions src/nanoarrow/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@

#include "nanoarrow.h"

const char* ArrowNanoarrowVersion() { return NANOARROW_VERSION; }
const char* ArrowNanoarrowVersion(void) { return NANOARROW_VERSION; }

int ArrowNanoarrowVersionInt() { return NANOARROW_VERSION_INT; }
int ArrowNanoarrowVersionInt(void) { return NANOARROW_VERSION_INT; }

int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) {
if (error == NULL) {
Expand Down

0 comments on commit 9447c63

Please sign in to comment.