diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index a160064..b844834 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -26,15 +26,13 @@ jobs: - {os: windows-latest, r: 'release'} - # Use older ubuntu to maximise backward compatibility - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - {os: ubuntu-latest, r: 'oldrel-1'} - {os: ubuntu-latest, r: 'oldrel-2'} - # Until dplyr::summarize is swapped for dplyr::reframe -# - {os: ubuntu-latest, r: 'oldrel-3'} # Uncomment this in 2023 (once current R is >= 4.3) - # The package requires R 3.6, I don't really remember why +# - {os: ubuntu-latest, r: 'oldrel-3'} + # Uncomment this in 2024 (once current R is >= 4.4) # - {os: ubuntu-latest, r: 'oldrel-4'} env: @@ -58,11 +56,6 @@ jobs: extra-packages: any::rcmdcheck, CAST=?ignore-before-r=4.1.0 needs: check - - name: Update dependencies - run: | - install.packages("purrr", repos = 'https://cloud.r-project.org') - shell: Rscript {0} - - uses: r-lib/actions/check-r-package@v2 env: _R_CHECK_FORCE_SUGGESTS_: false diff --git a/DESCRIPTION b/DESCRIPTION index 37028fb..88a8ffc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,7 +35,7 @@ URL: https://github.com/mikemahoney218/waywiser, https://mikemahoney218.github.io/waywiser/ BugReports: https://github.com/mikemahoney218/waywiser/issues Depends: - R (>= 3.6) + R (>= 4.0) Imports: dplyr, fields, @@ -58,7 +58,6 @@ Suggests: CAST, covr, ggplot2, - kableExtra, knitr, modeldata, recipes, @@ -74,6 +73,7 @@ Suggests: withr Config/testthat/edition: 3 Config/testthat/parallel: true +Config/Needs/website: kableExtra Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE, roclets = c("namespace", "rd", "srr::srr_stats_roclet")) diff --git a/R/data.R b/R/data.R index 960893f..3f4e896 100644 --- a/R/data.R +++ b/R/data.R @@ -74,7 +74,7 @@ #' #' This data is adapted from the CAST vignette #' `vignette("cast02-AOA-tutorial", package = "CAST")`. -#' The original data is derived from the Worldclim global climate variables/ +#' The original data is derived from the Worldclim global climate variables. #' #' @srrstats {G5.1} Data used to test the package is exported. #' diff --git a/R/global_geary.R b/R/global_geary.R index e095d7a..222f670 100644 --- a/R/global_geary.R +++ b/R/global_geary.R @@ -3,6 +3,10 @@ #' Calculate the global Geary's C statistic for model residuals. #' `ww_global_geary_c()` returns the statistic itself, while #' `ww_global_geary_pvalue()` returns the associated p value. +#' These functions are meant to help assess model predictions, for instance by +#' identifying if there are clusters of higher residuals than expected. For +#' statistical testing and inference applications, use +#' [spdep::geary.test()] instead. #' #' These functions can be used for geographic or projected coordinate reference #' systems and expect 2D data. diff --git a/R/global_moran.R b/R/global_moran.R index 7d065e8..361e60c 100644 --- a/R/global_moran.R +++ b/R/global_moran.R @@ -3,6 +3,10 @@ #' Calculate the global Moran's I statistic for model residuals. #' `ww_global_moran_i()` returns the statistic itself, while #' `ww_global_moran_pvalue()` returns the associated p value. +#' These functions are meant to help assess model predictions, for instance by +#' identifying if there are clusters of higher residuals than expected. For +#' statistical testing and inference applications, use +#' [spdep::moran.test()] instead. #' #' These functions can be used for geographic or projected coordinate reference #' systems and expect 2D data. diff --git a/R/local_geary.R b/R/local_geary.R index f75c1fe..779d171 100644 --- a/R/local_geary.R +++ b/R/local_geary.R @@ -3,6 +3,9 @@ #' Calculate the local Geary's C statistic for model residuals. #' `ww_local_geary_c()` returns the statistic itself, while #' `ww_local_geary_pvalue()` returns the associated p value. +#' These functions are meant to help assess model predictions, for instance by +#' identifying clusters of higher residuals than expected. For statistical +#' testing and inference applications, use [spdep::localC_perm()] instead. #' #' These functions can be used for geographic or projected coordinate reference #' systems and expect 2D data. diff --git a/R/local_getis.R b/R/local_getis.R index 0045366..52923c8 100644 --- a/R/local_getis.R +++ b/R/local_getis.R @@ -3,6 +3,9 @@ #' Calculate the local Getis-Ord G and G* statistic for model residuals. #' `ww_local_getis_ord_g()` returns the statistic itself, while #' `ww_local_getis_ord_pvalue()` returns the associated p value. +#' These functions are meant to help assess model predictions, for instance by +#' identifying clusters of higher residuals than expected. For statistical +#' testing and inference applications, use [spdep::localG_perm()] instead. #' #' These functions can be used for geographic or projected coordinate reference #' systems and expect 2D data. diff --git a/R/local_moran.R b/R/local_moran.R index 4715df4..7f81367 100644 --- a/R/local_moran.R +++ b/R/local_moran.R @@ -3,6 +3,9 @@ #' Calculate the local Moran's I statistic for model residuals. #' `ww_local_moran_i()` returns the statistic itself, while #' `ww_local_moran_pvalue()` returns the associated p value. +#' These functions are meant to help assess model predictions, for instance by +#' identifying clusters of higher residuals than expected. For statistical +#' testing and inference applications, use [spdep::localmoran_perm()] instead. #' #' These functions can be used for geographic or projected coordinate reference #' systems and expect 2D data. diff --git a/inst/srr_template_spatial_yardstick.R b/inst/srr_template_spatial_yardstick.R index 0e886e7..f8b5524 100644 --- a/inst/srr_template_spatial_yardstick.R +++ b/inst/srr_template_spatial_yardstick.R @@ -1,4 +1,5 @@ test_that("srr: expected failures for {{{name}}}", { + trip_dplyr_warning() worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( lm(response ~ bio2 * bio10 * bio13 * bio19, data = worldclim_simulation), @@ -226,6 +227,7 @@ test_that("srr: expected failures for {{{name}}}", { }) test_that("other generic srr standards", { + trip_dplyr_warning() skip_if_not_installed("withr") worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( diff --git a/man/global_geary_c.Rd b/man/global_geary_c.Rd index 569b7ce..451f3aa 100644 --- a/man/global_geary_c.Rd +++ b/man/global_geary_c.Rd @@ -61,6 +61,10 @@ For \verb{_vec()} functions, a single value (or NA). Calculate the global Geary's C statistic for model residuals. \code{ww_global_geary_c()} returns the statistic itself, while \code{ww_global_geary_pvalue()} returns the associated p value. +These functions are meant to help assess model predictions, for instance by +identifying if there are clusters of higher residuals than expected. For +statistical testing and inference applications, use +\code{\link[spdep:geary.test]{spdep::geary.test()}} instead. } \details{ These functions can be used for geographic or projected coordinate reference diff --git a/man/global_moran_i.Rd b/man/global_moran_i.Rd index f608c33..4be088d 100644 --- a/man/global_moran_i.Rd +++ b/man/global_moran_i.Rd @@ -61,6 +61,10 @@ For \verb{_vec()} functions, a single value (or NA). Calculate the global Moran's I statistic for model residuals. \code{ww_global_moran_i()} returns the statistic itself, while \code{ww_global_moran_pvalue()} returns the associated p value. +These functions are meant to help assess model predictions, for instance by +identifying if there are clusters of higher residuals than expected. For +statistical testing and inference applications, use +\code{\link[spdep:moran.test]{spdep::moran.test()}} instead. } \details{ These functions can be used for geographic or projected coordinate reference diff --git a/man/local_geary_c.Rd b/man/local_geary_c.Rd index 5b30137..4f10061 100644 --- a/man/local_geary_c.Rd +++ b/man/local_geary_c.Rd @@ -54,6 +54,9 @@ For \verb{_vec()} functions, a numeric vector of \code{length(truth)} (or NA). Calculate the local Geary's C statistic for model residuals. \code{ww_local_geary_c()} returns the statistic itself, while \code{ww_local_geary_pvalue()} returns the associated p value. +These functions are meant to help assess model predictions, for instance by +identifying clusters of higher residuals than expected. For statistical +testing and inference applications, use \code{\link[spdep:localC]{spdep::localC_perm()}} instead. } \details{ These functions can be used for geographic or projected coordinate reference diff --git a/man/local_getis_ord_g.Rd b/man/local_getis_ord_g.Rd index b676052..c6354fb 100644 --- a/man/local_getis_ord_g.Rd +++ b/man/local_getis_ord_g.Rd @@ -54,6 +54,9 @@ For \verb{_vec()} functions, a numeric vector of \code{length(truth)} (or NA). Calculate the local Getis-Ord G and G* statistic for model residuals. \code{ww_local_getis_ord_g()} returns the statistic itself, while \code{ww_local_getis_ord_pvalue()} returns the associated p value. +These functions are meant to help assess model predictions, for instance by +identifying clusters of higher residuals than expected. For statistical +testing and inference applications, use \code{\link[spdep:localG]{spdep::localG_perm()}} instead. } \details{ These functions can be used for geographic or projected coordinate reference diff --git a/man/local_moran_i.Rd b/man/local_moran_i.Rd index fae6fa5..eef103f 100644 --- a/man/local_moran_i.Rd +++ b/man/local_moran_i.Rd @@ -52,6 +52,9 @@ For \verb{_vec()} functions, a numeric vector of \code{length(truth)} (or NA). Calculate the local Moran's I statistic for model residuals. \code{ww_local_moran_i()} returns the statistic itself, while \code{ww_local_moran_pvalue()} returns the associated p value. +These functions are meant to help assess model predictions, for instance by +identifying clusters of higher residuals than expected. For statistical +testing and inference applications, use \code{\link[spdep:localmoran]{spdep::localmoran_perm()}} instead. } \details{ These functions can be used for geographic or projected coordinate reference diff --git a/man/worldclim_simulation.Rd b/man/worldclim_simulation.Rd index f42028f..b2fb2df 100644 --- a/man/worldclim_simulation.Rd +++ b/man/worldclim_simulation.Rd @@ -24,6 +24,6 @@ worldclim_simulation \description{ This data is adapted from the CAST vignette \code{vignette("cast02-AOA-tutorial", package = "CAST")}. -The original data is derived from the Worldclim global climate variables/ +The original data is derived from the Worldclim global climate variables. } \keyword{datasets} diff --git a/tests/helper.R b/tests/helper.R deleted file mode 100644 index e620a8d..0000000 --- a/tests/helper.R +++ /dev/null @@ -1,3 +0,0 @@ -if (identical(Sys.getenv("NOT_CRAN"), "true")) { - Sys.setenv("waywiser_test_cast" = "true") -} diff --git a/tests/testthat/_snaps/local_geary.md b/tests/testthat/_snaps/local_geary.md index d322dfe..2562809 100644 --- a/tests/testthat/_snaps/local_geary.md +++ b/tests/testthat/_snaps/local_geary.md @@ -2,13 +2,6 @@ Code df_local_c <- ww_local_geary_c(guerry_modeled, Crm_prs, predictions) - Warning - Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0. - i Please use `reframe()` instead. - i When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly. - i The deprecated feature was likely used in the yardstick package. - Please report the issue at . - Code df_local_c[1:3] Output # A tibble: 85 x 3 diff --git a/tests/testthat/_snaps/srr-ww_local_geary_c.md b/tests/testthat/_snaps/srr-ww_local_geary_c.md index 03b1c48..695342a 100644 --- a/tests/testthat/_snaps/srr-ww_local_geary_c.md +++ b/tests/testthat/_snaps/srr-ww_local_geary_c.md @@ -726,12 +726,6 @@ Code ww_local_geary_c(worldclim_simulation, response, response) - Warning - Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0. - i Please use `reframe()` instead. - i When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly. - i The deprecated feature was likely used in the yardstick package. - Please report the issue at . Output # A tibble: 10,000 x 3 .metric .estimator .estimate diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R new file mode 100644 index 0000000..26cd75c --- /dev/null +++ b/tests/testthat/helper.R @@ -0,0 +1,11 @@ +if (identical(Sys.getenv("NOT_CRAN"), "true")) { + Sys.setenv("waywiser_test_cast" = "true") +} + +trip_dplyr_warning <- function() { + invisible( + suppressWarnings( + dplyr::summarise(dplyr::group_by(iris, Species), 1:2) + ) + ) +} diff --git a/tests/testthat/test-agreement_coefficient.R b/tests/testthat/test-agreement_coefficient.R index 9f26404..e5f2f22 100644 --- a/tests/testthat/test-agreement_coefficient.R +++ b/tests/testthat/test-agreement_coefficient.R @@ -1,4 +1,5 @@ test_that("agreement coefficients match Ji and Gallo", { + trip_dplyr_warning() #' @srrstats {G5.7} Algorithm performs as expected x <- c(6, 8, 9, 10, 11, 14) y <- c(2, 3, 5, 5, 6, 8) @@ -62,6 +63,7 @@ test_that("agreement coefficients match Ji and Gallo", { }) test_that("agreement coefficients are the same across methods", { + trip_dplyr_warning() x <- c(6, 8, 9, 10, 11, 14) y <- c(2, 3, 5, 5, 6, 8) df <- data.frame(x = x, y = y) diff --git a/tests/testthat/test-area_of_applicability.R b/tests/testthat/test-area_of_applicability.R index 43b0cc7..893af8e 100644 --- a/tests/testthat/test-area_of_applicability.R +++ b/tests/testthat/test-area_of_applicability.R @@ -18,6 +18,7 @@ importance <- vip::vi_permute( ) test_that("`ww_area_of_applicability` is properly classed", { + trip_dplyr_warning() model <- ww_area_of_applicability(y ~ ., train, test, importance) expect_s3_class(model, "ww_area_of_applicability") expect_s3_class(model, "hardhat_model") @@ -25,12 +26,14 @@ test_that("`ww_area_of_applicability` is properly classed", { test_that("`ww_area_of_applicability` is not defined for vectors", { + trip_dplyr_warning() expect_snapshot_error( ww_area_of_applicability(mtcars$mpg) ) }) test_that("`ww_area_of_applicability` finds 0 distance between identical data", { + trip_dplyr_warning() #' @srrstats {G3.0} Testing with appropriate tolerances. expect_equal( suppressWarnings( @@ -43,7 +46,7 @@ test_that("`ww_area_of_applicability` finds 0 distance between identical data", }) test_that("`ww_area_of_applicability` works with or without a testing set", { - + trip_dplyr_warning() expect_error( ww_area_of_applicability(y ~ ., train, test, importance), NA @@ -57,7 +60,7 @@ test_that("`ww_area_of_applicability` works with or without a testing set", { }) test_that("`ww_area_of_applicability` methods are equivalent", { - + trip_dplyr_warning() methods <- list( ww_area_of_applicability(y ~ ., train, test, importance), ww_area_of_applicability(train[2:11], test[2:11], importance), @@ -117,7 +120,7 @@ test_that("`ww_area_of_applicability` methods are equivalent", { }) test_that("`ww_area_of_applicability` can handle different column orders", { - + trip_dplyr_warning() #' @srrstats {G3.0} Testing with appropriate tolerances. expect_equal( ww_area_of_applicability(train[2:11], test[2:11], importance)$aoa_threshold, @@ -133,7 +136,7 @@ test_that("`ww_area_of_applicability` can handle different column orders", { }) test_that("NAs are handled", { - + trip_dplyr_warning() train[1, 2] <- NA test[1, 2] <- NA comb_rset <- rsample::make_splits(train, test) @@ -216,6 +219,7 @@ test_that("NAs are handled", { }) test_that("Expected errors", { + trip_dplyr_warning() expect_snapshot( ww_area_of_applicability(y ~ ., train, test[1:10], importance), error = TRUE @@ -252,7 +256,7 @@ importance <- vip::vi_permute( aoa <- ww_area_of_applicability(y ~ ., train, test, importance) test_that("normal use", { - + trip_dplyr_warning() expect_snapshot( predict(aoa, test) ) @@ -265,6 +269,7 @@ test_that("normal use", { }) test_that("`new_ww_area_of_applicability` arguments are assigned correctly", { + trip_dplyr_warning() x <- ww_area_of_applicability(y ~ ., train, test, importance) skip_on_os("mac") @@ -284,6 +289,7 @@ test_that("`new_ww_area_of_applicability` arguments are assigned correctly", { #' @srrstats {G5.4c} Data is derived originally from CAST and associated paper test_that("ww_area_of_applicability() is close-enough to CAST", { skip_on_cran() + trip_dplyr_warning() #' @srrstats {SP6.2} Testing with ~global data relevant_data <- head(as.data.frame(worldclim_simulation)[c(1:4, 6)], 1000) @@ -331,6 +337,7 @@ test_that("ww_area_of_applicability() is close-enough to CAST", { }) test_that("loaded data is equivalent", { + trip_dplyr_warning() importance <- data.frame( term = c("bio2", "bio10", "bio13", "bio19"), estimate = c(50.68727, 57.66859, 62.81009, 48.72391) diff --git a/tests/testthat/test-global_geary.R b/tests/testthat/test-global_geary.R index 4588d46..5c10810 100644 --- a/tests/testthat/test-global_geary.R +++ b/tests/testthat/test-global_geary.R @@ -1,5 +1,5 @@ test_that("Global Geary statistics are stable", { - + trip_dplyr_warning() guerry_modeled <- guerry guerry_lm <- lm(Crm_prs ~ Litercy, guerry_modeled) guerry_modeled$predictions <- predict(guerry_lm, guerry_modeled) diff --git a/tests/testthat/test-global_moran.R b/tests/testthat/test-global_moran.R index 1af4f04..5a36866 100644 --- a/tests/testthat/test-global_moran.R +++ b/tests/testthat/test-global_moran.R @@ -1,5 +1,5 @@ test_that("Global Moran statistics are stable", { - + trip_dplyr_warning() guerry_modeled <- guerry guerry_lm <- lm(Crm_prs ~ Litercy, guerry_modeled) guerry_modeled$predictions <- predict(guerry_lm, guerry_modeled) diff --git a/tests/testthat/test-local_geary.R b/tests/testthat/test-local_geary.R index ffdceeb..f4621a1 100644 --- a/tests/testthat/test-local_geary.R +++ b/tests/testthat/test-local_geary.R @@ -1,4 +1,5 @@ test_that("Local geary statistics are stable", { + trip_dplyr_warning() set.seed(123) guerry_modeled <- guerry diff --git a/tests/testthat/test-local_getis.R b/tests/testthat/test-local_getis.R index 2d8603b..3ffaeb5 100644 --- a/tests/testthat/test-local_getis.R +++ b/tests/testthat/test-local_getis.R @@ -1,6 +1,6 @@ set.seed(123) test_that("Local Getis-Ord statistics are stable", { - + trip_dplyr_warning() guerry_modeled <- guerry guerry_lm <- lm(Crm_prs ~ Litercy, guerry_modeled) guerry_modeled$predictions <- predict(guerry_lm, guerry_modeled) @@ -61,7 +61,7 @@ test_that("Local Getis-Ord statistics are stable", { }) test_that("Local Getis-Ord statistics are stable", { - + trip_dplyr_warning() guerry_modeled <- guerry guerry_lm <- lm(Crm_prs ~ Litercy, guerry_modeled) guerry_modeled$predictions <- predict(guerry_lm, guerry_modeled) diff --git a/tests/testthat/test-local_moran.R b/tests/testthat/test-local_moran.R index ff8f17d..3b8f750 100644 --- a/tests/testthat/test-local_moran.R +++ b/tests/testthat/test-local_moran.R @@ -1,6 +1,6 @@ set.seed(123) test_that("Local Moran statistics are stable", { - + trip_dplyr_warning() guerry_modeled <- guerry guerry_lm <- lm(Crm_prs ~ Litercy, guerry_modeled) guerry_modeled$predictions <- predict(guerry_lm, guerry_modeled) diff --git a/tests/testthat/test-misc.R b/tests/testthat/test-misc.R index 13ed5dc..a6c51a2 100644 --- a/tests/testthat/test-misc.R +++ b/tests/testthat/test-misc.R @@ -1,5 +1,5 @@ test_that("ww_make_point_neighbors is stable", { - + trip_dplyr_warning() guerry_pt <- sf::st_geometry(guerry) guerry_pt <- sf::st_centroid(guerry_pt) expect_snapshot( @@ -13,7 +13,7 @@ test_that("ww_make_point_neighbors is stable", { }) test_that("ww_make_polygon_neighbors is stable", { - + trip_dplyr_warning() expect_snapshot( ww_make_polygon_neighbors(guerry) ) @@ -25,7 +25,7 @@ test_that("ww_make_polygon_neighbors is stable", { }) test_that("ww_build_neighbors is stable", { - + trip_dplyr_warning() expect_snapshot( ww_build_neighbors(guerry) ) @@ -53,7 +53,7 @@ test_that("ww_build_neighbors is stable", { }) test_that("ww_build_weights is stable", { - + trip_dplyr_warning() expect_snapshot( ww_build_weights(guerry) ) @@ -85,6 +85,7 @@ test_that("ww_build_weights is stable", { }) test_that("expected_errors", { + trip_dplyr_warning() expect_snapshot( ww_agreement_coefficient_vec(1:5, c(1:4, NA), na_action = c(na.omit, na.pass)), error = TRUE diff --git a/tests/testthat/test-misc_yardstick.R b/tests/testthat/test-misc_yardstick.R index 06c8d7b..a73befe 100644 --- a/tests/testthat/test-misc_yardstick.R +++ b/tests/testthat/test-misc_yardstick.R @@ -1,4 +1,5 @@ test_that("passing functions to build weights", { + trip_dplyr_warning() guerry_modeled <- guerry guerry_lm <- lm(Crm_prs ~ Litercy, guerry_modeled) guerry_modeled$predictions <- predict(guerry_lm, guerry_modeled) @@ -12,6 +13,7 @@ test_that("passing functions to build weights", { }) test_that("edge cases", { + trip_dplyr_warning() guerry_modeled <- guerry guerry_lm <- lm(Crm_prs ~ Litercy, guerry_modeled) guerry_modeled$predictions <- predict(guerry_lm, guerry_modeled) diff --git a/tests/testthat/test-multi_scale.R b/tests/testthat/test-multi_scale.R index c92102d..cf62b63 100644 --- a/tests/testthat/test-multi_scale.R +++ b/tests/testthat/test-multi_scale.R @@ -1,5 +1,6 @@ test_that("ww_multi_scale", { skip_if_not_installed("modeldata") + trip_dplyr_warning() data(ames, package = "modeldata") ames_sf <- sf::st_as_sf(ames, coords = c("Longitude", "Latitude"), crs = 4326) ames_model <- lm(Sale_Price ~ Lot_Area, data = ames_sf) @@ -78,6 +79,7 @@ test_that("ww_multi_scale", { }) test_that("expected errors", { + trip_dplyr_warning() guerry_modeled <- guerry guerry_lm <- lm(Crm_prs ~ Litercy, guerry_modeled) guerry_modeled$predictions <- predict(guerry_lm, guerry_modeled) @@ -119,6 +121,7 @@ test_that("expected errors", { }) test_that("srr: expected failures for ww_multi_scale", { + trip_dplyr_warning() worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( lm(response ~ bio2 * bio10 * bio13 * bio19, data = worldclim_simulation), @@ -233,6 +236,7 @@ test_that("srr: expected failures for ww_multi_scale", { test_that("other generic srr standards", { skip_if_not_installed("withr") + trip_dplyr_warning() worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( lm(response ~ bio2 * bio10 * bio13 * bio19, data = worldclim_simulation), diff --git a/tests/testthat/test-srr-ww_global_geary_c.R b/tests/testthat/test-srr-ww_global_geary_c.R index 60e3a78..c529409 100644 --- a/tests/testthat/test-srr-ww_global_geary_c.R +++ b/tests/testthat/test-srr-ww_global_geary_c.R @@ -2,6 +2,7 @@ # Please edit inst/srr_template_spatial_yardstick.R instead test_that("srr: expected failures for ww_global_geary_c", { + trip_dplyr_warning() worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( lm(response ~ bio2 * bio10 * bio13 * bio19, data = worldclim_simulation), @@ -229,6 +230,7 @@ test_that("srr: expected failures for ww_global_geary_c", { }) test_that("other generic srr standards", { + trip_dplyr_warning() skip_if_not_installed("withr") worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( diff --git a/tests/testthat/test-srr-ww_global_geary_pvalue.R b/tests/testthat/test-srr-ww_global_geary_pvalue.R index b177a53..63eb5f8 100644 --- a/tests/testthat/test-srr-ww_global_geary_pvalue.R +++ b/tests/testthat/test-srr-ww_global_geary_pvalue.R @@ -2,6 +2,7 @@ # Please edit inst/srr_template_spatial_yardstick.R instead test_that("srr: expected failures for ww_global_geary_pvalue", { + trip_dplyr_warning() worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( lm(response ~ bio2 * bio10 * bio13 * bio19, data = worldclim_simulation), @@ -229,6 +230,7 @@ test_that("srr: expected failures for ww_global_geary_pvalue", { }) test_that("other generic srr standards", { + trip_dplyr_warning() skip_if_not_installed("withr") worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( diff --git a/tests/testthat/test-srr-ww_global_moran_i.R b/tests/testthat/test-srr-ww_global_moran_i.R index c4ca3b3..08d72b9 100644 --- a/tests/testthat/test-srr-ww_global_moran_i.R +++ b/tests/testthat/test-srr-ww_global_moran_i.R @@ -2,6 +2,7 @@ # Please edit inst/srr_template_spatial_yardstick.R instead test_that("srr: expected failures for ww_global_moran_i", { + trip_dplyr_warning() worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( lm(response ~ bio2 * bio10 * bio13 * bio19, data = worldclim_simulation), @@ -229,6 +230,7 @@ test_that("srr: expected failures for ww_global_moran_i", { }) test_that("other generic srr standards", { + trip_dplyr_warning() skip_if_not_installed("withr") worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( diff --git a/tests/testthat/test-srr-ww_global_moran_pvalue.R b/tests/testthat/test-srr-ww_global_moran_pvalue.R index e782725..adaa8ce 100644 --- a/tests/testthat/test-srr-ww_global_moran_pvalue.R +++ b/tests/testthat/test-srr-ww_global_moran_pvalue.R @@ -2,6 +2,7 @@ # Please edit inst/srr_template_spatial_yardstick.R instead test_that("srr: expected failures for ww_global_moran_pvalue", { + trip_dplyr_warning() worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( lm(response ~ bio2 * bio10 * bio13 * bio19, data = worldclim_simulation), @@ -229,6 +230,7 @@ test_that("srr: expected failures for ww_global_moran_pvalue", { }) test_that("other generic srr standards", { + trip_dplyr_warning() skip_if_not_installed("withr") worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( diff --git a/tests/testthat/test-srr-ww_local_geary_c.R b/tests/testthat/test-srr-ww_local_geary_c.R index 91b425e..321bb23 100644 --- a/tests/testthat/test-srr-ww_local_geary_c.R +++ b/tests/testthat/test-srr-ww_local_geary_c.R @@ -2,6 +2,7 @@ # Please edit inst/srr_template_spatial_yardstick.R instead test_that("srr: expected failures for ww_local_geary_c", { + trip_dplyr_warning() worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( lm(response ~ bio2 * bio10 * bio13 * bio19, data = worldclim_simulation), @@ -229,6 +230,7 @@ test_that("srr: expected failures for ww_local_geary_c", { }) test_that("other generic srr standards", { + trip_dplyr_warning() skip_if_not_installed("withr") worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( diff --git a/tests/testthat/test-srr-ww_local_geary_pvalue.R b/tests/testthat/test-srr-ww_local_geary_pvalue.R index 2cf4e7b..6e13493 100644 --- a/tests/testthat/test-srr-ww_local_geary_pvalue.R +++ b/tests/testthat/test-srr-ww_local_geary_pvalue.R @@ -2,6 +2,7 @@ # Please edit inst/srr_template_spatial_yardstick.R instead test_that("srr: expected failures for ww_local_geary_pvalue", { + trip_dplyr_warning() worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( lm(response ~ bio2 * bio10 * bio13 * bio19, data = worldclim_simulation), @@ -229,6 +230,7 @@ test_that("srr: expected failures for ww_local_geary_pvalue", { }) test_that("other generic srr standards", { + trip_dplyr_warning() skip_if_not_installed("withr") worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( diff --git a/tests/testthat/test-srr-ww_local_getis_ord_g.R b/tests/testthat/test-srr-ww_local_getis_ord_g.R index e9ef34e..81bfbaa 100644 --- a/tests/testthat/test-srr-ww_local_getis_ord_g.R +++ b/tests/testthat/test-srr-ww_local_getis_ord_g.R @@ -2,6 +2,7 @@ # Please edit inst/srr_template_spatial_yardstick.R instead test_that("srr: expected failures for ww_local_getis_ord_g", { + trip_dplyr_warning() worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( lm(response ~ bio2 * bio10 * bio13 * bio19, data = worldclim_simulation), @@ -229,6 +230,7 @@ test_that("srr: expected failures for ww_local_getis_ord_g", { }) test_that("other generic srr standards", { + trip_dplyr_warning() skip_if_not_installed("withr") worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( diff --git a/tests/testthat/test-srr-ww_local_getis_ord_g_pvalue.R b/tests/testthat/test-srr-ww_local_getis_ord_g_pvalue.R index cd2f2ef..7488807 100644 --- a/tests/testthat/test-srr-ww_local_getis_ord_g_pvalue.R +++ b/tests/testthat/test-srr-ww_local_getis_ord_g_pvalue.R @@ -2,6 +2,7 @@ # Please edit inst/srr_template_spatial_yardstick.R instead test_that("srr: expected failures for ww_local_getis_ord_g_pvalue", { + trip_dplyr_warning() worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( lm(response ~ bio2 * bio10 * bio13 * bio19, data = worldclim_simulation), @@ -229,6 +230,7 @@ test_that("srr: expected failures for ww_local_getis_ord_g_pvalue", { }) test_that("other generic srr standards", { + trip_dplyr_warning() skip_if_not_installed("withr") worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( diff --git a/tests/testthat/test-srr-ww_local_moran_i.R b/tests/testthat/test-srr-ww_local_moran_i.R index c7f7bae..9ae6df9 100644 --- a/tests/testthat/test-srr-ww_local_moran_i.R +++ b/tests/testthat/test-srr-ww_local_moran_i.R @@ -2,6 +2,7 @@ # Please edit inst/srr_template_spatial_yardstick.R instead test_that("srr: expected failures for ww_local_moran_i", { + trip_dplyr_warning() worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( lm(response ~ bio2 * bio10 * bio13 * bio19, data = worldclim_simulation), @@ -229,6 +230,7 @@ test_that("srr: expected failures for ww_local_moran_i", { }) test_that("other generic srr standards", { + trip_dplyr_warning() skip_if_not_installed("withr") worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( diff --git a/tests/testthat/test-srr-ww_local_moran_pvalue.R b/tests/testthat/test-srr-ww_local_moran_pvalue.R index ca45e4f..8db13bc 100644 --- a/tests/testthat/test-srr-ww_local_moran_pvalue.R +++ b/tests/testthat/test-srr-ww_local_moran_pvalue.R @@ -2,6 +2,7 @@ # Please edit inst/srr_template_spatial_yardstick.R instead test_that("srr: expected failures for ww_local_moran_pvalue", { + trip_dplyr_warning() worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( lm(response ~ bio2 * bio10 * bio13 * bio19, data = worldclim_simulation), @@ -229,6 +230,7 @@ test_that("srr: expected failures for ww_local_moran_pvalue", { }) test_that("other generic srr standards", { + trip_dplyr_warning() skip_if_not_installed("withr") worldclim_predicted <- worldclim_simulation worldclim_predicted$predicted <- predict( diff --git a/vignettes/articles/Feature-Matrix.Rmd b/vignettes/articles/Feature-Matrix.Rmd new file mode 100644 index 0000000..349d9b1 --- /dev/null +++ b/vignettes/articles/Feature-Matrix.Rmd @@ -0,0 +1,36 @@ +--- +title: "Feature Matrix" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +## Feature Matrix + +Below is a table of the major statistical functions waywiser provides, alongside +brief descriptions and journal references for each. For a complete list of +package functions, check out the [package reference page](https://mikemahoney218.github.io/waywiser/reference/index.html). + +```{r echo=FALSE} +funs <- tibble::tribble( + ~ "Function", ~ "Description", ~ "Category", ~ "Reference", + "ww_area_of_applicability()", "Calculate the 'area of applicability' of a model", "AOA", kableExtra::text_spec("H. Meyer and E. Pebesma. 2021. Predicting into unknown space? Estimating the area of applicability of spatial prediction models, Methods in Ecology and Evolution 12(9), pp 1620 - 1633, doi: 10.1111/2041-210X.13650.", link = "https://doi.org/10.1111/2041-210X.13650"), + "ww_multi_scale()", "Calculate model metrics for predictions aggregated to multiple levels", "Multi-scale assessment", kableExtra::text_spec("Riemann, R., Wilson, B. T., Lister, A., and Parks, S. (2010) An effective assessment protocol for continuous geospatial datasets of forest characteristics using USFS Forest Inventory and Analysis (FIA) data. Remote Sensing of Environment 114(10), pp 2337-2352, doi: 10.1016/j.rse.2010.05.010.", link = "https://doi.org/10.1016/j.rse.2010.05.010."), + "ww_global_moran_i() (plus _vec and _pvalue variants)", "Calculate Moran's I", "Spatial yardstick metrics", paste(kableExtra::text_spec("Moran, P.A.P. (1950). Notes on Continuous Stochastic Phenomena. Biometrika, 37(1/2), pp 17. doi: 10.2307/2332142", link = "https://doi.org/10.2307/2332142"), kableExtra::text_spec("Cliff, A. D., Ord, J. K. 1981 Spatial processes, Pion, p. 17.", link = "https://doi.org/10.1111/2041-210X.13650"), sep = "; "), + "ww_local_moran_i() (plus _vec and _pvalue variants)", "Calculate local Moran's I (Anselin's LISA)", "Spatial yardstick metrics", paste(kableExtra::text_spec("Anselin, L. 1995. Local indicators of spatial association, Geographical Analysis, 27, pp 93–115. doi: 10.1111/j.1538-4632.1995.tb00338.x", link = "https://doi.org/10.1111/j.1538-4632.1995.tb00338.x"), kableExtra::text_spec("Sokal, R. R, Oden, N. L. and Thomson, B. A. 1998. Local Spatial Autocorrelation in a Biological Model. Geographical Analysis, 30, pp 331–354. doi: 10.1111/j.1538-4632.1998.tb00406.x", link = "https://doi.org/10.1111/j.1538-4632.1998.tb00406.x"), sep = "; "), + "ww_global_geary_c() (plus _vec and _pvalue variants)", "Calculate Geary's C", "Spatial yardstick metrics", paste(kableExtra::text_spec("Geary, R. C. (1954). The Contiguity Ratio and Statistical Mapping. The Incorporated Statistician. 5 (3): 115–145. doi: 10.2307/2986645", link = "https://doi.org/10.2307/2986645"), kableExtra::text_spec("Cliff, A. D., Ord, J. K. 1981 Spatial processes, Pion, p. 17.", link = "https://doi.org/10.1111/2041-210X.13650"), sep = "; "), + "ww_local_geary_c() (plus _vec and _pvalue variants)", "Calculate local Geary's C", "Spatial yardstick metrics", paste(kableExtra::text_spec("Anselin, L. 1995. Local indicators of spatial association, Geographical Analysis, 27, pp 93–115. doi: 10.1111/j.1538-4632.1995.tb00338.x", link = "https://doi.org/10.1111/j.1538-4632.1995.tb00338.x"), kableExtra::text_spec("Anselin, L. 2019. A Local Indicator of Multivariate Spatial Association: Extending Geary's C. Geographical Analysis, 51, pp 133-150. doi: 10.1111/gean.12164", link = "https://doi.org/10.1111/gean.12164"), sep = "; "), + "ww_local_getis_ord_g() (plus _vec and _pvalue variants)", "Calculate local Getis-Ord G and G*", "Spatial yardstick metrics", kableExtra::text_spec("Ord, J. K. and Getis, A. 1995. Local spatial autocorrelation statistics: distributional issues and an application. Geographical Analysis, 27, 286–306. doi: 10.1111/j.1538-4632.1995.tb00912.x", link = "https://doi.org/10.1111/j.1538-4632.1995.tb00912.x"), + "ww_agreement_coefficient(), ww_systematic_agreement_coefficient(), ww_systematic_mpd(), ww_systematic_rmpd() (plus unsystematic and _vec variants)", "Calculate agreement coefficients and related metrics", "Yardstick metrics", kableExtra::text_spec("Ji, L. and Gallo, K. 2006. An Agreement Coefficient for Image Comparison. Photogrammetric Engineering & Remote Sensing 72(7), pp 823–833, doi: 10.14358/PERS.72.7.823.", link = "https://doi.org/10.14358/PERS.72.7.823"), + "ww_willmott_d(), ww_willmott_dr(), ww_systematic_mse(), ww_systematic_rmse() (plus unsystematic and _vec variants)", "Calculate Willmott's d and related metrics", "Yardstick metrics", paste(kableExtra::text_spec("Willmott, C. J. 1981. On the Validation of Models. Physical Geography 2(2), pp 184-194, doi: 10.1080/02723646.1981.10642213", link = "https://doi.org/10.1080/02723646.1981.10642213"), kableExtra::text_spec("Willmott, C. J., Robeson, S. M., and Matsuura, K. A refined index of model performance. International Journal of Climatology 32, pp 2088-2094, doi: 10.1002/joc.2419", link = "https://doi.org/10.1002/joc.2419"), kableExtra::text_spec("Willmott, C. J., Robeson, S. M., and Matsuura, K. A refined index of model performance. International Journal of Climatology 32, pp 2088-2094, doi: 10.1002/joc.2419", link = "https://doi.org/10.1002/joc.2419"), sep = "; "), +) + +kableExtra::kable_styling( + kableExtra::kbl(funs), + bootstrap_options = c("striped", "hover") +) +``` diff --git a/vignettes/residual-autocorrelation.Rmd b/vignettes/residual-autocorrelation.Rmd index 0ab24a3..1e9b29a 100644 --- a/vignettes/residual-autocorrelation.Rmd +++ b/vignettes/residual-autocorrelation.Rmd @@ -14,6 +14,43 @@ knitr::opts_chunk$set( ) ``` +Perhaps the most famous sentence in spatial analysis is Tobler's first law of +geography, from [Tobler (1970)](https://doi.org/10.2307/143141): "Everything is +related to everything else, but near things are more related than distant +things." Spatial data often exhibits spatial autocorrelation, where variables of +interest are not distributed at random but rather exhibit spatial patterns; in +particular, spatial data is often clustered (exhibiting positive spatial +autocorrelation) such that locations near each other are _more similar_ than +you'd expect if you had just sampled two observations at random. + +For some data, this makes intuitive sense. The elevation at two neighboring +points is extremely likely to be similar, as is the precipitation and +temperature; these are variables whose values depend on (among other things) +your position on the Earth. However, the first law is often over-interpreted. +[Pebesma and Bivand (2022)](https://r-spatial.org/book/15-Measures.html) present +an interesting discussion of the "first law", quoting +[Olsson (1970)](https://doi.org/10.2307/143140) who says: + +> [T]he fact that the autocorrelations seem to hide systematic specification errors suggests that the elevation of this statement to the status of ‘the first law of geography’ is at best premature. At worst, the statement may represent the spatial variant of the post hoc fallacy, which would mean that coincidence has been mistaken for a causal relation. + +Oftentimes, finding spatial autocorrelation in a variable is a result of that +variable depending on _other_ variables, which may or may not be spatially +dependent themselves. For instance, house prices often exhibit positive +autocorrelation, not because home prices are determined by their relative +position on Earth, but because house prices rely upon other variables -- school +zones, median income, housing availability and more -- which may themselves +be spatially autocorrelated. + +For that reason, it's often worthwhile to look at the spatial autocorrelation of +model residuals, to see if your model makes more errors in certain regions than +you'd expect if errors were randomly arranged. That can help you to identify +misspecifications in your model: seeing large autocorrelations in model +residuals in an area might suggest that you're missing variables in your model, +and knowing which areas your model does worse in can help you to identify what +those variables might be. Even if you can't fix your model, it's often useful to +identify regions your model does notably worse in, so that you can communicate +that to whoever winds up using your predictions. + Let's walk through how we can use waywiser to find local indicators of spatial autocorrelation for a very simple model. First things first, let's load a few libraries: ```{r message=FALSE} @@ -93,4 +130,4 @@ guerry %>% ) ``` -This makes it easy to see what areas are poorly represented by our model, which might lead us to identify ways to improve our model or help us identify caveats and limitations of the models we're working with. +This makes it easy to see what areas are poorly represented by our model (which have the highest local Moran values), which might lead us to identify ways to improve our model or help us identify caveats and limitations of the models we're working with. diff --git a/vignettes/waywiser.Rmd b/vignettes/waywiser.Rmd index 892d6d7..6d47fdb 100644 --- a/vignettes/waywiser.Rmd +++ b/vignettes/waywiser.Rmd @@ -11,7 +11,7 @@ vignette: > knitr::opts_chunk$set( collapse = TRUE, comment = "#>", - eval = rlang::is_installed("vip") && rlang::is_installed("kableExtra") + eval = rlang::is_installed("vip") && rlang::is_installed("ggplot2") ) ``` @@ -30,22 +30,31 @@ things: extending this tool to work with tidymodels infrastructure. This vignette will walk through each of these goals in turn. Before we do that, -let's set up the data we'll use in examples: +let's set up the data we'll use in examples. We'll be using simulated data +based on [Worldclim](https://www.worldclim.org/) variables; our predictors here +represent temperature and precipitation values at sampled locations, while, +our response represents a virtual species distribution: ```{r setup} library(waywiser) +set.seed(1107) + +worldclim_training <- sample(nrow(worldclim_simulation) * 0.8) +worldclim_testing <- worldclim_simulation[-worldclim_training, ] +worldclim_training <- worldclim_simulation[worldclim_training, ] + worldclim_model <- lm( response ~ bio2 + bio10 + bio13 + bio19, - worldclim_simulation + worldclim_training ) -worldclim_simulation$predictions <- predict( +worldclim_testing$predictions <- predict( worldclim_model, - worldclim_simulation + worldclim_testing ) -head(worldclim_simulation) +head(worldclim_testing) ``` ## Yardstick Extensions @@ -61,14 +70,14 @@ coefficient from [Ji and Gallo 2006](https://doi.org/10.14358/PERS.72.7.823): ```{r} ww_agreement_coefficient( - worldclim_simulation, + worldclim_testing, truth = response, estimate = predictions ) ww_agreement_coefficient_vec( - truth = worldclim_simulation$response, - estimate = worldclim_simulation$predictions + truth = worldclim_testing$response, + estimate = worldclim_testing$predictions ) ``` @@ -77,7 +86,7 @@ Some of these additional metrics are implemented by wrapping functions from the ```{r} ww_global_geary_c( - worldclim_simulation, + worldclim_testing, truth = response, estimate = predictions ) @@ -91,14 +100,14 @@ or a function that will create spatial weights: ```{r} ww_global_geary_c( - worldclim_simulation, + worldclim_testing, truth = response, estimate = predictions, - wt = ww_build_weights(worldclim_simulation) + wt = ww_build_weights(worldclim_testing) ) ww_global_geary_c( - worldclim_simulation, + worldclim_testing, truth = response, estimate = predictions, wt = ww_build_weights @@ -112,7 +121,7 @@ Because these are yardstick metrics, they can be used with yardstick::metric_set( ww_agreement_coefficient, ww_global_geary_c -)(worldclim_simulation, +)(worldclim_testing, truth = response, estimate = predictions) ``` @@ -129,22 +138,35 @@ recommend assessing model predictions aggregated to multiple scales. The `ww_multi_scale()` function helps automate this process. The interface for this function works similarly to that for yardstick metrics -- you provide your data, your true values, and your estimate -- except you also must provide -instructions for how to aggregate your data. This can take the form of providing -information on how to build standardized grids, by passing arguments that will -be used by `sf::st_make_grid()`, or by passing polygons directly: +instructions for how to aggregate your data. You can do this by passing +arguments that will be used by `sf::st_make_grid()`; for instance, we can use +the `n` argument to control how many polygons our grid has in the x and y +directions. + +Note that each element of argument vector is used to make a separate grid -- +so, for instance, passing `n = c(2, 4)` will result in one 2-by-2 grid and one +4-by-4 grid, because `n[[1]]` is 2 and `n[[2]]` is 4. If we actually wanted to +create a 2-by-4 grid, by passing `sf::st_make_grid()` the argument +`n = c(2, 4)`, we need to wrap that vector in a list so that running `n[[1]]` +returns `c(2, 4)`: ```{r} ww_multi_scale( - worldclim_simulation, + worldclim_testing, truth = response, estimate = predictions, metrics = list(ww_agreement_coefficient, yardstick::rmse), - n = list(2, 4) + n = list(c(2, 4)) ) +``` + +You can also pass polygons directly, if you have pre-defined grids you'd like +to use: -grid <- sf::st_make_grid(worldclim_simulation, n = 2) +```{r} +grid <- sf::st_make_grid(worldclim_testing, n = c(2, 4)) ww_multi_scale( - worldclim_simulation, + worldclim_testing, truth = response, estimate = predictions, metrics = list(ww_agreement_coefficient, yardstick::rmse), @@ -157,45 +179,55 @@ ww_multi_scale( Last but not least, we can also see if there's any areas in our data that are too different from our training data for us to safely predict on, which fall outside the "area of applicability" defined by -[Meyer and Pebesma (2021)](https://doi.org/10.1111/2041-210X.13650): +[Meyer and Pebesma (2021)](https://doi.org/10.1111/2041-210X.13650). This +approach looks at how similar the predictor values of new data are to the data +you used to train your data, with each predictor weighted by how important it is +to your model. + +In order to calculate your area of applicability, you can pass +`ww_area_of_applicability()` information about which of your variables are used +as predictors in your model, your training data, and the importance scores for +each of your variables. Out of the box, waywiser should work with any of the +importance score-calculating functions from the vip package: ```{r} worldclim_aoa <- ww_area_of_applicability( response ~ bio2 + bio10 + bio13 + bio19, - worldclim_simulation, + worldclim_training, importance = vip::vi_model(worldclim_model) ) worldclim_aoa ``` +You can also pass a data.frame with columns named "term" and "estimate" +(containing the name of each term, or predictor, and their estimated importance) +rather than using the vip package if that's more convenient. + The objects returned by `ww_area_of_applicability()` are models in their own right, which can be used by functions such as `predict()` to calculate if new observations are in the area of applicability of a model. -## Feature Matrix - -Below is a table of the major statistical functions waywiser provides, alongside -brief descriptions and journal references for each. For a complete list of -package functions, check out the [package reference page](https://mikemahoney218.github.io/waywiser/reference/index.html). - -```{r echo=FALSE} -funs <- tibble::tribble( - ~ "Function", ~ "Description", ~ "Category", ~ "Reference", - "ww_area_of_applicability()", "Calculate the 'area of applicability' of a model", "AOA", "H. Meyer and E. Pebesma. 2021. Predicting into unknown space? Estimating the area of applicability of spatial prediction models, Methods in Ecology and Evolution 12(9), pp 1620 - 1633, doi: 10.1111/2041-210X.13650.", - "ww_multi_scale()", "Calculate model metrics for predictions aggregated to multiple levels", "Multi-scale assessment", "Riemann, R., Wilson, B. T., Lister, A., and Parks, S. (2010) An effective assessment protocol for continuous geospatial datasets of forest characteristics using USFS Forest Inventory and Analysis (FIA) data. Remote Sensing of Environment 114(10), pp 2337-2352, doi: 10.1016/j.rse.2010.05.010.", - "ww_global_moran_i() (plus _vec and _pvalue variants)", "Calculate Moran's I", "Spatial yardstick metrics", "Moran, P.A.P. (1950). Notes on Continuous Stochastic Phenomena. Biometrika, 37(1/2), pp 17. doi: 10.2307/2332142; Cliff, A. D., Ord, J. K. 1981 Spatial processes, Pion, p. 17.", - "ww_local_moran_i() (plus _vec and _pvalue variants)", "Calculate local Moran's I (Anselin's LISA)", "Spatial yardstick metrics", "Anselin, L. 1995. Local indicators of spatial association, Geographical Analysis, 27, pp 93–115. doi: 10.1111/j.1538-4632.1995.tb00338.x. Sokal, R. R, Oden, N. L. and Thomson, B. A. 1998. Local Spatial Autocorrelation in a Biological Model. Geographical Analysis, 30, pp 331–354. doi: 10.1111/j.1538-4632.1998.tb00406.x", - "ww_global_geary_c() (plus _vec and _pvalue variants)", "Calculate Geary's C", "Spatial yardstick metrics", "Geary, R. C. (1954). The Contiguity Ratio and Statistical Mapping. The Incorporated Statistician. 5 (3): 115–145. doi: 10.2307/2986645; Cliff, A. D., Ord, J. K. 1981 Spatial processes, Pion, p. 17.", - "ww_local_geary_c() (plus _vec and _pvalue variants)", "Calculate local Geary's C", "Spatial yardstick metrics", "Anselin, L. 1995. Local indicators of spatial association, Geographical Analysis, 27, pp 93–115. doi: 10.1111/j.1538-4632.1995.tb00338.x. Anselin, L. 2019. A Local Indicator of Multivariate Spatial Association: Extending Geary's C. Geographical Analysis, 51, pp 133-150. doi: 10.1111/gean.12164", - "ww_local_getis_ord_g() (plus _vec and _pvalue variants)", "Calculate local Getis-Ord G and G*", "Spatial yardstick metrics", "Ord, J. K. and Getis, A. 1995. Local spatial autocorrelation statistics: distributional issues and an application. Geographical Analysis, 27, 286–306. doi: 10.1111/j.1538-4632.1995.tb00912.x", - "ww_agreement_coefficient(), ww_systematic_agreement_coefficient(), ww_systematic_mpd(), ww_systematic_rmpd() (plus unsystematic and _vec variants)", "Calculate agreement coefficients and related metrics", "Yardstick metrics", "Ji, L. and Gallo, K. 2006. An Agreement Coefficient for Image Comparison. Photogrammetric Engineering & Remote Sensing 72(7), pp 823–833, doi: 10.14358/PERS.72.7.823.", - "ww_willmott_d(), ww_willmott_dr(), ww_systematic_mse(), ww_systematic_rmse() (plus unsystematic and _vec variants)", "Calculate Willmott's d and related metrics", "Yardstick metrics", "Willmott, C. J. 1981. On the Validation of Models. Physical Geography 2(2), pp 184-194, doi: 10.1080/02723646.1981.10642213; Willmott, C. J. 1982. Some Comments on the Evaluation of Model Performance. Bulletin of the American Meteorological Society 63(11), pp 1309-1313, doi: 10.1175/1520-0477(1982)063<1309:SCOTEO>2.0.CO;2.; Willmott, C. J., Robeson, S. M., and Matsuura, K. A refined index of model performance. International Journal of Climatology 32, pp 2088-2094, doi: 10.1002/joc.2419.", +```{r} +worldclim_testing <- cbind( + worldclim_testing, + predict(worldclim_aoa, worldclim_testing) ) -kableExtra::kable_styling( - kableExtra::kbl(funs), - bootstrap_options = c("striped", "hover") -) +head(worldclim_testing) +``` + +The predict function returns the "distance index", or "di", for each +observation: a score of how far away the observation is, in predictor space, +from your training data. Points with a "di" higher than a set threshold are +"outside" the area of applicability. We can visualize our test set here to see +that our model often, but not always, performs worse on observations with a +higher "di": + +```{r} +library(ggplot2) + +ggplot(worldclim_testing, aes(di, abs(response - predictions), color = aoa)) + + geom_point(alpha = 0.6) ```