apacheGH-38928: [R] Fix spelling (apache#38929)

### Rationale for this change ### What changes are included in this PR? Spelling fixes to r/ ### Are these changes tested? ### Are there any user-facing changes? * Closes: apache#38928 Authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com> Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
clayburn · Dec 7, 2023 · f2fb8ff · f2fb8ff
1 parent 1b634e7
commit f2fb8ff
Show file tree

Hide file tree

Showing 51 changed files with 84 additions and 84 deletions.
diff --git a/r/NEWS.md b/r/NEWS.md
@@ -80,10 +80,10 @@
 
 ## Installation
 
-* MacOS builds now use the same installation pathway as on Linux (@assignUser,
+* macOS builds now use the same installation pathway as on Linux (@assignUser,
   #37684).
 * A warning message is now issued on package load when running under emulation
-  on MacOS (i.e., use of x86 installation of R on M1/aarch64; #37777).
+  on macOS (i.e., use of x86 installation of R on M1/aarch64; #37777).
 * R scripts that run during configuration and installation are now run
   using the correct R interpreter (@meztez, #37225).
 * Failed libarrow builds now return more detailed output (@amoeba, #37727).
@@ -416,7 +416,7 @@ As of version 10.0.0, `arrow` requires C++17 to build. This means that:
 
 * The `arrow.dev_repo` for nightly builds of the R package and prebuilt
   libarrow binaries is now <https://nightlies.apache.org/arrow/r/>.
-* Brotli and BZ2 are shipped with MacOS binaries. BZ2 is shipped with Windows binaries. (#13484)
+* Brotli and BZ2 are shipped with macOS binaries. BZ2 is shipped with Windows binaries. (#13484)
 
 # arrow 8.0.0
 
@@ -549,7 +549,7 @@ Arrow arrays and tables can be easily concatenated:
 ## Other improvements and fixes
 
 * Many of the vignettes have been reorganized, restructured and expanded to improve their usefulness and clarity.
-* Code to generate schemas (and individual data type specficiations) are accessible with the `$code()` method on a `schema` or `type`. This allows you to easily get the code needed to create a schema from an object that already has one.
+* Code to generate schemas (and individual data type specifications) are accessible with the `$code()` method on a `schema` or `type`. This allows you to easily get the code needed to create a schema from an object that already has one.
 * Arrow `Duration` type has been mapped to R's `difftime` class.
 * The `decimal256()` type is supported. The `decimal()` function has been revised to call either `decimal256()` or `decimal128()` based on the value of the `precision` argument.
 * `write_parquet()` uses a reasonable guess at `chunk_size` instead of always writing a single chunk. This improves the speed of reading and writing large Parquet files.
@@ -824,7 +824,7 @@ to send and receive data. See `vignette("flight", package = "arrow")` for an ove
 
 * `arrow` now depends on [`cpp11`](https://cpp11.r-lib.org/), which brings more robust UTF-8 handling and faster compilation
 * The Linux build script now succeeds on older versions of R
-* MacOS binary packages now ship with zstandard compression enabled
+* macOS binary packages now ship with zstandard compression enabled
 
 ## Bug fixes and other enhancements
 

diff --git a/r/R/arrow-object.R b/r/R/arrow-object.R
@@ -56,7 +56,7 @@ ArrowObject <- R6Class("ArrowObject",
       # Return NULL, because keeping this R6 object in scope is not a good idea.
       # This syntax would allow the rare use that has to actually do this to
       # do `object <- object$.unsafe_delete()` and reduce the chance that an
-      # IDE like RStudio will try try to call other methods which will error
+      # IDE like RStudio will try to call other methods which will error
       invisible(NULL)
     }
   )

diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
@@ -183,7 +183,7 @@ configure_tzdb <- function() {
   # Just to be extra safe, let's wrap this in a try();
   # we don't want a failed startup message to prevent the package from loading
   try({
-    # On MacOS only, Check if we are running in under emulation, and warn this will not work
+    # On macOS only, Check if we are running in under emulation, and warn this will not work
     if (on_rosetta()) {
       packageStartupMessage(
         paste(

diff --git a/r/R/compression.R b/r/R/compression.R
@@ -61,7 +61,7 @@ Codec$create <- function(type = "gzip", compression_level = NA) {
 #' the Arrow C++ library. This function lets you know which are available for
 #' use.
 #' @param type A string, one of "uncompressed", "snappy", "gzip", "brotli",
-#' "zstd", "lz4", "lzo", or "bz2", case insensitive.
+#' "zstd", "lz4", "lzo", or "bz2", case-insensitive.
 #' @return Logical: is `type` available?
 #' @export
 #' @examples

diff --git a/r/R/config.R b/r/R/config.R
@@ -40,7 +40,7 @@ io_thread_count <- function() {
 
 #' @rdname io_thread_count
 #' @param num_threads integer: New number of threads for thread pool. At least
-#'   two threads are reccomended to support all operations in the arrow
+#'   two threads are recommended to support all operations in the arrow
 #'   package.
 #' @export
 set_io_thread_count <- function(num_threads) {

diff --git a/r/R/csv.R b/r/R/csv.R
@@ -76,7 +76,7 @@
 #'
 #' Note that if you are specifying column names, whether by `schema` or
 #' `col_names`, and the CSV file has a header row that would otherwise be used
-#' to idenfity column names, you'll need to add `skip = 1` to skip that row.
+#' to identify column names, you'll need to add `skip = 1` to skip that row.
 #'
 #' @param file A character file name or URI, literal data (either a single string or a [raw] vector),
 #' an Arrow input stream, or a `FileSystem` with path (`SubTreeFileSystem`).

diff --git a/r/R/dataset.R b/r/R/dataset.R
@@ -46,7 +46,7 @@
 #'
 #' The default behavior in `open_dataset()` is to inspect the file paths
 #' contained in the provided directory, and if they look like Hive-style, parse
-#' them as Hive. If your dataset has Hive-style partioning in the file paths,
+#' them as Hive. If your dataset has Hive-style partitioning in the file paths,
 #' you do not need to provide anything in the `partitioning` argument to
 #' `open_dataset()` to use them. If you do provide a character vector of
 #' partition column names, they will be ignored if they match what is detected,

diff --git a/r/R/dplyr-count.R b/r/R/dplyr-count.R
@@ -56,7 +56,7 @@ tally.arrow_dplyr_query <- function(x, wt = NULL, sort = FALSE, name = NULL) {
 
 tally.Dataset <- tally.ArrowTabular <- tally.RecordBatchReader <- tally.arrow_dplyr_query
 
-# we don't want to depend on dplyr, but we refrence these above
+# we don't want to depend on dplyr, but we reference these above
 utils::globalVariables(c("n", "desc"))
 
 check_n_name <- function(name,

diff --git a/r/R/dplyr-filter.R b/r/R/dplyr-filter.R
@@ -28,20 +28,20 @@ filter.arrow_dplyr_query <- function(.data, ..., .by = NULL, .preserve = FALSE)
     out$group_by_vars <- by$names
   }
 
-  filts <- expand_across(out, quos(...))
-  if (length(filts) == 0) {
+  expanded_filters <- expand_across(out, quos(...))
+  if (length(expanded_filters) == 0) {
     # Nothing to do
     return(as_adq(.data))
   }
 
   # tidy-eval the filter expressions inside an Arrow data_mask
-  filters <- lapply(filts, arrow_eval, arrow_mask(out))
+  filters <- lapply(expanded_filters, arrow_eval, arrow_mask(out))
   bad_filters <- map_lgl(filters, ~ inherits(., "try-error"))
   if (any(bad_filters)) {
     # This is similar to abandon_ship() except that the filter eval is
     # vectorized, and we apply filters that _did_ work before abandoning ship
     # with the rest
-    expr_labs <- map_chr(filts[bad_filters], format_expr)
+    expr_labs <- map_chr(expanded_filters[bad_filters], format_expr)
     if (query_on_dataset(out)) {
       # Abort. We don't want to auto-collect if this is a Dataset because that
       # could blow up, too big.
@@ -71,7 +71,7 @@ filter.arrow_dplyr_query <- function(.data, ..., .by = NULL, .preserve = FALSE)
       if (by$from_by) {
         out <- dplyr::ungroup(out)
       }
-      return(dplyr::filter(out, !!!filts[bad_filters], .by = {{ .by }}))
+      return(dplyr::filter(out, !!!expanded_filters[bad_filters], .by = {{ .by }}))
     }
   }
 

diff --git a/r/R/dplyr-funcs-augmented.R b/r/R/dplyr-funcs-augmented.R
@@ -18,7 +18,7 @@
 #' Add the data filename as a column
 #'
 #' This function only exists inside `arrow` `dplyr` queries, and it only is
-#' valid when quering on a `FileSystemDataset`.
+#' valid when querying on a `FileSystemDataset`.
 #'
 #' To use filenames generated by this function in subsequent pipeline steps, you
 #' must either call \code{\link[dplyr:compute]{compute()}} or

diff --git a/r/R/dplyr-funcs-conditional.R b/r/R/dplyr-funcs-conditional.R
@@ -55,7 +55,7 @@ register_bindings_conditional <- function() {
       }
 
       if (last_arg && arg$type_id() %in% TYPES_WITH_NAN) {
-        # store the NA_real_ in the same type as arg to avoid avoid casting
+        # store the NA_real_ in the same type as arg to avoid casting
         # smaller float types to larger float types
         NA_expr <- Expression$scalar(Scalar$create(NA_real_, type = arg$type()))
         Expression$create("if_else", Expression$create("is_nan", arg), NA_expr, arg)

diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
@@ -459,15 +459,15 @@ register_bindings_datetime_timezone <- function() {
         roll_dst[1],
         "error" = 0L,
         "boundary" = 2L,
-        arrow_not_supported("`roll_dst` value must be 'error' or 'boundary' for non-existent times; other values")
+        arrow_not_supported("`roll_dst` value must be 'error' or 'boundary' for nonexistent times; other values")
       )
 
       ambiguous <- switch(
         roll_dst[2],
         "error" = 0L,
         "pre" = 1L,
         "post" = 2L,
-        arrow_not_supported("`roll_dst` value must be 'error', 'pre', or 'post' for non-existent times")
+        arrow_not_supported("`roll_dst` value must be 'error', 'pre', or 'post' for nonexistent times")
       )
 
       if (identical(tzone, "")) {

diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
@@ -516,7 +516,7 @@ register_bindings_string_other <- function() {
         msg = "`stop` must be length 1 - other lengths are not supported in Arrow"
       )
 
-      # substr treats values as if they're on a continous number line, so values
+      # substr treats values as if they're on a continuous number line, so values
       # 0 are effectively blank characters - set `start` to 1 here so Arrow mimics
       # this behavior
       if (start <= 0) {

diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R
@@ -158,8 +158,8 @@ register_bindings_type_cast <- function() {
         if (identical(fix.empty.names, TRUE)) {
           names(args) <- make.names(names(args), unique = TRUE)
         } else {
-          name_emtpy <- names(args) == ""
-          names(args)[!name_emtpy] <- make.names(names(args)[!name_emtpy], unique = TRUE)
+          name_empty <- names(args) == ""
+          names(args)[!name_empty] <- make.names(names(args)[!name_empty], unique = TRUE)
         }
       }
 

diff --git a/r/R/duckdb.R b/r/R/duckdb.R
@@ -89,7 +89,7 @@ arrow_duck_connection <- function() {
     # but if we don't explicitly run dbDisconnect() the user gets a warning
     # that they may not expect (since they did not open a duckdb connection).
     # This bit of code will run when the package namespace is cleaned up (i.e.,
-    # at exit). This is more reliable than .onUnload() or .onDetatch(), which
+    # at exit). This is more reliable than .onUnload() or .onDetach(), which
     # don't necessarily run on exit.
     reg.finalizer(arrow_duck_finalizer, function(...) {
       con <- getOption("arrow_duck_con")

diff --git a/r/R/extension.R b/r/R/extension.R
@@ -83,7 +83,7 @@ ExtensionArray$create <- function(x, type) {
 #' - `$WrapArray(array)`: Wraps a storage [Array] into an [ExtensionArray]
 #'   with this extension type.
 #'
-#' In addition, subclasses may override the following methos to customize
+#' In addition, subclasses may override the following methods to customize
 #' the behaviour of extension classes.
 #'
 #' - `$deserialize_instance()`: This method is called when a new [ExtensionType]
@@ -184,7 +184,7 @@ ExtensionType <- R6Class("ExtensionType",
     },
     ToString = function() {
       # metadata is probably valid UTF-8 (e.g., JSON), but might not be
-      # and it's confusing to error when printing the object. This herustic
+      # and it's confusing to error when printing the object. This heuristic
       # isn't perfect (but subclasses should override this method anyway)
       metadata_raw <- self$extension_metadata()
 
@@ -286,7 +286,7 @@ ExtensionType$create <- function(storage_type,
 #'   "dot" syntax (i.e., "some_package.some_type"). The namespace "arrow"
 #'    is reserved for extension types defined by the Apache Arrow libraries.
 #' @param extension_metadata A [raw()] or [character()] vector containing the
-#'   serialized version of the type. Chatacter vectors must be length 1 and
+#'   serialized version of the type. Character vectors must be length 1 and
 #'   are converted to UTF-8 before converting to [raw()].
 #' @param type_class An [R6::R6Class] whose `$new()` class method will be
 #'   used to construct a new instance of the type.

diff --git a/r/R/feather.R b/r/R/feather.R
@@ -24,7 +24,7 @@
 #' a legacy version available starting in 2016, and the Version 2 (V2),
 #' which is the Apache Arrow IPC file format.
 #' The default version is V2.
-#' V1 files are distinct from Arrow IPC files and lack many feathures,
+#' V1 files are distinct from Arrow IPC files and lack many features,
 #' such as the ability to store all Arrow data tyeps, and compression support.
 #' [write_ipc_file()] can only write V2 files.
 #'
@@ -91,7 +91,7 @@ write_feather <- function(x,
     }
   }
   if (is.null(compression_level)) {
-    # Use -1 as sentinal for "default"
+    # Use -1 as sentinel for "default"
     compression_level <- -1L
   }
   compression_level <- as.integer(compression_level)

diff --git a/r/R/filesystem.R b/r/R/filesystem.R
@@ -156,7 +156,7 @@ FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = F
 #'    buckets if `$CreateDir()` is called on the bucket level (default `FALSE`).
 #' - `allow_bucket_deletion`: logical, if TRUE, the filesystem will delete
 #'    buckets if`$DeleteDir()` is called on the bucket level (default `FALSE`).
-#' - `request_timeout`: Socket read time on Windows and MacOS in seconds. If
+#' - `request_timeout`: Socket read time on Windows and macOS in seconds. If
 #'    negative, the AWS SDK default (typically 3 seconds).
 #' - `connect_timeout`: Socket connection timeout in seconds. If negative, AWS
 #'    SDK default is used (typically 1 second).

diff --git a/r/R/parquet.R b/r/R/parquet.R
@@ -128,7 +128,7 @@ read_parquet <- function(file,
 #'  - A named vector, to specify the value for the named columns, the default
 #'    value for the setting is used when not supplied
 #'
-#' The `compression` argument can be any of the following (case insensitive):
+#' The `compression` argument can be any of the following (case-insensitive):
 #' "uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4", "lzo" or "bz2".
 #' Only "uncompressed" is guaranteed to be available, but "snappy" and "gzip"
 #' are almost always included. See [codec_is_available()].

diff --git a/r/R/udf.R b/r/R/udf.R
@@ -154,7 +154,7 @@ arrow_scalar_function <- function(fun, in_type, out_type, auto_convert = FALSE)
       sprintf(
         paste0(
           "Expected `fun` to accept %d argument(s)\n",
-          "but found a function that acccepts %d argument(s)\n",
+          "but found a function that accepts %d argument(s)\n",
           "Did you forget to include `context` as the first argument?"
         ),
         expected_n_args,

diff --git a/r/configure b/r/configure
@@ -62,7 +62,7 @@ PKG_CONFIG_NAME="arrow"
 PKG_BREW_NAME="apache-arrow"
 PKG_TEST_HEADER="<arrow/api.h>"
 
-# Some env vars that control the build (all logical, case insensitive)
+# Some env vars that control the build (all logical, case-insensitive)
 # Development mode, also increases verbosity in the bundled build
 ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'`
 # The bundled build compiles arrow C++ from source; FORCE ensures we don't pick up

diff --git a/r/man/ExtensionType.Rd b/r/man/ExtensionType.Rd
diff --git a/r/man/FileSystem.Rd b/r/man/FileSystem.Rd
diff --git a/r/man/add_filename.Rd b/r/man/add_filename.Rd
diff --git a/r/man/codec_is_available.Rd b/r/man/codec_is_available.Rd
diff --git a/r/man/io_thread_count.Rd b/r/man/io_thread_count.Rd
diff --git a/r/man/new_extension_type.Rd b/r/man/new_extension_type.Rd
diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd
diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd
diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd
diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd
diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
@@ -747,7 +747,7 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
   // Helper class to convert to R strings. We declare one of these for the
   // class to avoid having to stack-allocate one for every STRING_ELT call.
   // This class does not own a reference to any arrays: it is the caller's
-  // responsibility to ensure the Array lifetime exeeds that of the viewer.
+  // responsibility to ensure the Array lifetime exceeds that of the viewer.
   struct RStringViewer {
     RStringViewer() : strip_out_nuls_(false), nul_was_stripped_(false) {}