-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* basic file structure; R and macos gitignore; code and docs for feat_scale_norm.R * basic rmarkdown readme; r cmd check github action * assessments.R * build out example in readme * simplify readme table construction; add comments * kernel_weights.R * sl_screeners.R; import methods * sl_wrappers.R * learners.R; predict.R; updates * export function * to save on memory, don't retain sl lib fits * readme with fully worked example * tweak readme details * update r cmd check gha to only stop for errors * regen readme with taller plots * try to import a specific version of a pkg archived from cran * try installing from cran github instead * try installing lme4 dependencies manually * forgot to sudo * install lme4 via apt * try to force a newer version of lme4
- Loading branch information
1 parent
f8358fe
commit 7ae8e36
Showing
49 changed files
with
3,213 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
^\.github$ | ||
^README.Rmd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
*.html |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples | ||
on: | ||
push: | ||
branches: [main] | ||
pull_request: | ||
branches: [main] | ||
|
||
name: R-CMD-check | ||
|
||
jobs: | ||
R-CMD-check: | ||
runs-on: ${{ matrix.config.os }} | ||
|
||
name: ${{ matrix.config.os }} (${{ matrix.config.r }}) | ||
|
||
strategy: | ||
fail-fast: false | ||
matrix: | ||
config: | ||
- {os: windows-latest, r: 'release'} | ||
- {os: ubuntu-latest, r: 'release'} | ||
|
||
env: | ||
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} | ||
R_KEEP_PKG_SOURCE: yes | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
|
||
- uses: r-lib/actions/setup-pandoc@v2 | ||
|
||
- uses: r-lib/actions/setup-r@v2 | ||
with: | ||
r-version: ${{ matrix.config.r }} | ||
http-user-agent: ${{ matrix.config.http-user-agent }} | ||
use-public-rspm: true | ||
|
||
- uses: r-lib/actions/setup-r-dependencies@v2 | ||
with: | ||
extra-packages: any::rcmdcheck | ||
needs: check | ||
upgrade: 'TRUE' | ||
|
||
- uses: r-lib/actions/check-r-package@v2 | ||
with: | ||
upload-snapshots: true | ||
error-on: '"error"' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
Package: yall | ||
Title: Yet Another Local Learner (YALL) | ||
Version: 0.1.0 | ||
Date: 2023-11-06 | ||
Authors@R: | ||
person(given = "Sara", | ||
family = "Moore", | ||
role = c("aut", "cre"), | ||
email = "sara@saraemoore.com", | ||
comment = c(ORCID = "0000-0001-7996-8003")) | ||
Description: A localized machine learning algorithm inspired by and built on Cross-Validated SuperLearner. | ||
Imports: | ||
methods, | ||
dplyr, | ||
magrittr, | ||
purrr, | ||
tidyr, | ||
tibble, | ||
broom, | ||
kedd, | ||
scales, | ||
SuperLearner, | ||
SuperSelector, | ||
SLScreenExtra, | ||
origami, | ||
cvAUC, | ||
ROCR, | ||
yardstick, | ||
future.apply, | ||
strip | ||
Remotes: | ||
saraemoore/SuperSelector, | ||
saraemoore/SLScreenExtra, | ||
cran/kedd | ||
License: GPL-3 | ||
Encoding: UTF-8 | ||
Roxygen: list(markdown = TRUE) | ||
RoxygenNote: 7.2.3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(GridLearner) | ||
export(SL.glm.mean) | ||
export(assess_predictions) | ||
export(distKernelWeights) | ||
export(predictAllYALL) | ||
export(screen.corRank2.wgtd) | ||
export(screen.corRank3.wgtd) | ||
export(screen.corRank4.wgtd) | ||
export(screen.corRank5.wgtd) | ||
export(screen.corRank6.wgtd) | ||
export(summarizeYALL) | ||
export(trainYALL) | ||
export(trainYALLfold) | ||
import(SLScreenExtra) | ||
import(SuperLearner) | ||
importFrom(ROCR,performance) | ||
importFrom(ROCR,prediction) | ||
importFrom(SLScreenExtra,screen.wgtd.corRank) | ||
importFrom(SuperSelector,cvSLFeatureSelector) | ||
importFrom(SuperSelector,factor_to_indicator) | ||
importFrom(SuperSelector,groupBySelectionSet) | ||
importFrom(SuperSelector,makeLibArgNumeric) | ||
importFrom(broom,tidy) | ||
importFrom(cvAUC,ci.cvAUC) | ||
importFrom(dplyr,arrange) | ||
importFrom(dplyr,bind_cols) | ||
importFrom(dplyr,bind_rows) | ||
importFrom(dplyr,distinct) | ||
importFrom(dplyr,do) | ||
importFrom(dplyr,full_join) | ||
importFrom(dplyr,group_by) | ||
importFrom(dplyr,mutate) | ||
importFrom(dplyr,mutate_all) | ||
importFrom(dplyr,n) | ||
importFrom(dplyr,one_of) | ||
importFrom(dplyr,select) | ||
importFrom(dplyr,slice) | ||
importFrom(dplyr,summarize) | ||
importFrom(dplyr,transmute) | ||
importFrom(dplyr,ungroup) | ||
importFrom(future.apply,future_lapply) | ||
importFrom(kedd,kernel.fun) | ||
importFrom(magrittr,`%>%`) | ||
importFrom(methods,as) | ||
importFrom(origami,training) | ||
importFrom(origami,validation) | ||
importFrom(purrr,map) | ||
importFrom(purrr,map2) | ||
importFrom(scales,percent) | ||
importFrom(stats,gaussian) | ||
importFrom(stats,glm) | ||
importFrom(stats,predict) | ||
importFrom(stats,setNames) | ||
importFrom(strip,strip) | ||
importFrom(tibble,as_tibble) | ||
importFrom(tibble,tibble) | ||
importFrom(tidyr,unnest) | ||
importFrom(yardstick,pr_auc_vec) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# yall 0.1.0 | ||
|
||
Initial version. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#' Cross-validated AUC estimate with confidence interval as a formatted string | ||
#' | ||
#' @param yhat A vector of predicted outcome values | ||
#' @param y A vector of observed outcome values | ||
#' @param folds Vector of fold IDs. If \code{NULL}, the standard error will be | ||
#' estimated as though there were a single fold (no cross-validation). | ||
#' @param confidence Confidence level; defaults to \code{0.95} | ||
#' @return A character string | ||
#' @importFrom cvAUC ci.cvAUC | ||
pretty_cvauc_ci <- function(yhat, y, folds = NULL, confidence = 0.95) { | ||
res_cvauc <- cvAUC::ci.cvAUC(predictions = yhat, labels = y, | ||
folds = folds, confidence = confidence) | ||
sprintf("AUC = %.3f (%.3f, %.3f)", res_cvauc$cvAUC, res_cvauc$ci[1], res_cvauc$ci[2]) | ||
} | ||
|
||
#' Prepare data.frames for multiple model assessment visualizations | ||
#' | ||
#' @param yhat A vector of predicted outcome values | ||
#' @param y A vector of observed outcome values | ||
#' @param yhat_label Optional label to describe the model output. Useful when | ||
#' multiple models will be compared in a single visualization. Defaults to | ||
#' "Predictions". | ||
#' @return A named list containing 4 \code{data.frame}s: "res" (containing | ||
#' predicted and observed outcome values), "roc" (ROC curve), "pr" | ||
#' (precision-recall curve), and "acc" (accuracy curve). | ||
#' @importFrom yardstick pr_auc_vec | ||
#' @importFrom ROCR prediction performance | ||
#' @export | ||
assess_predictions <- function(yhat, y, yhat_label = "Predictions") { | ||
auc_res <- pretty_cvauc_ci(yhat, y) | ||
|
||
aupr_res <- yardstick::pr_auc_vec(factor(y, levels = c(1, 0)), yhat, event_level = "first") | ||
|
||
roc_pred <- ROCR::prediction(yhat, y) | ||
roc_res <- ROCR::performance(roc_pred, measure = "tpr", x.measure = "fpr") | ||
prec_res <- ROCR::performance(roc_pred, measure = "prec", x.measure = "rec") | ||
acc_res <- ROCR::performance(roc_pred, measure = "acc") | ||
|
||
roc_auc_df <- data.frame(fpr = roc_res@x.values[[1]], tpr=roc_res@y.values[[1]], | ||
auc.details = auc_res) | ||
prec_rec_df <- data.frame(rec = prec_res@x.values[[1]], prec = prec_res@y.values[[1]], | ||
auc.details = aupr_res) | ||
acc_df <- data.frame(cutoff = acc_res@x.values[[1]], acc = acc_res@y.values[[1]]) | ||
|
||
roc_auc_df$method = paste(yhat_label, auc_res, sep = "\n") | ||
prec_rec_df$method = paste(yhat_label, paste("AUCPR =", round(aupr_res, 3)), sep = "\n") | ||
|
||
return(list(res = data.frame(y = y, yhat = yhat), roc = roc_auc_df, pr = prec_rec_df, acc = acc_df)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
#' Calculate the magnitude of a vector | ||
#' | ||
#' @param x a numeric vector | ||
#' @param f a character string representing one of the following functions: | ||
#' * \code{L1}: Manhattan aka 1-norm or L1-norm (default) | ||
#' * \code{L2}: Euclidean aka 2-norm or L2-norm | ||
#' * \code{L3}: Minkowski aka 3-norm or L3-norm | ||
#' * \code{L4}: Minkowski aka 4-norm or L4-norm | ||
#' * \code{L5}: Minkowski aka 5-norm or L5-norm | ||
#' * \code{L6}: Minkowski aka 6-norm or L6-norm | ||
#' * \code{max}: Chebyshev aka infinity norm | ||
#' @return a numeric value | ||
vectorNorm = function(x, f = c("L1", "L2", "L3", "L4", "L5", "L6", "max")) { | ||
f = match.arg(f) | ||
switch(as.character(f), | ||
L1 = sum(abs(x)), | ||
L2 = sqrt(sum(x^2)), | ||
L3 = sum(abs(x)^3)^(1/3), | ||
L4 = sum(x^4)^(1/4), | ||
L5 = sum(abs(x)^5)^(1/5), | ||
L6 = sum(x^6)^(1/6), | ||
max = max(abs(x)) | ||
# TODO: add Mahalanobis distance? might not work | ||
) | ||
} | ||
|
||
#' Calculate the magnitude of row-vectors | ||
#' | ||
#' @param x A \code{data.frame} or \code{matrix} | ||
#' @param f a character string representing one of the following functions: | ||
#' * \code{L1}: Manhattan aka 1-norm or L1-norm (default) | ||
#' * \code{L2}: Euclidean aka 2-norm or L2-norm | ||
#' * \code{L3}: Minkowski aka 3-norm or L3-norm | ||
#' * \code{L4}: Minkowski aka 4-norm or L4-norm | ||
#' * \code{L5}: Minkowski aka 5-norm or L5-norm | ||
#' * \code{L6}: Minkowski aka 6-norm or L6-norm | ||
#' * \code{max}: Chebyshev aka infinity norm | ||
#' @return a numeric vector | ||
rowNorms = function(x, f = c("L1", "L2", "L3", "L4", "L5", "L6", "max")) { | ||
f = match.arg(f) | ||
switch(as.character(f), | ||
L1 = rowSums(abs(x)), | ||
L2 = sqrt(rowSums(x^2)), | ||
L3 = rowSums(abs(x)^3)^(1/3), | ||
L4 = rowSums(x^4)^(1/4), | ||
L5 = rowSums(abs(x)^5)^(1/5), | ||
L6 = rowSums(x^6)^(1/6), | ||
max = apply(abs(x), 1, max) | ||
# TODO: add Mahalanobis distance? might not work | ||
) | ||
# canberra aka weighted L_1 distance: not useful here b/c centroid is 0 for all features | ||
# One-minus-Pearson-correlation: also not useful here b/c centroid is 0 for all features | ||
} | ||
|
||
#' Normalize a dataset to a single observation | ||
#' | ||
#' Normalize a dataset \code{X} to a single observation \code{obs} by | ||
#' subtracting the observation's values and dividing by the L1 norm. | ||
#' | ||
#' @param obs A \code{data.frame} containing a single record/row (typically a | ||
#' validation/test set observation) to which \code{X} will be normalized. | ||
#' @param X A \code{data.frame} (typically containing the training set) to be | ||
#' normalized. | ||
#' @param keepX Character vector containing names of retained features. | ||
#' @return A \code{data.frame} | ||
#' @importFrom dplyr bind_rows select one_of mutate_all slice n | ||
#' @importFrom magrittr `%>%` | ||
normalizeFeatures <- function(obs, X, keepX = colnames(X)) { | ||
bind_rows(obs, X) %>% | ||
# only keep columns of X in keepX | ||
select(one_of(keepX)) %>% | ||
# center each column by its value in obs | ||
mutate_all(list(~ . - .[1])) %>% | ||
# scale each column by its L1 norm. | ||
# could instead do norm(as.matrix(.), "1"), but sum(abs(.)) is faster. | ||
# when a column is completely homogeneous, this step produces all NaNs: | ||
mutate_all(list(~ ./vectorNorm(.))) %>% | ||
# remove obs that was prepended | ||
slice(2:n()) %>% | ||
# tibbles are trouble | ||
as.data.frame() | ||
} | ||
|
||
#' Estimate the record-level distance from \code{validX} to \code{trainX} | ||
#' | ||
#' @param normF a character string representing one of the following functions: | ||
#' * \code{L1}: Manhattan aka 1-norm or L1-norm (default) | ||
#' * \code{L2}: Euclidean aka 2-norm or L2-norm | ||
#' * \code{L3}: Minkowski aka 3-norm or L3-norm | ||
#' * \code{L4}: Minkowski aka 4-norm or L4-norm | ||
#' * \code{L5}: Minkowski aka 5-norm or L5-norm | ||
#' * \code{L6}: Minkowski aka 6-norm or L6-norm | ||
#' * \code{max}: Chebyshev aka infinity norm | ||
#' @param validX A \code{matrix}, \code{data.frame}, or numeric vector | ||
#' containing one or more observations to which \code{trainX} should be | ||
#' normalized (typically a validation/test set). | ||
#' @param trainX A \code{data.frame} (typically containing the training set) to | ||
#' be normalized. | ||
#' @param keepX Character vector containing names of retained features. | ||
#' @param verbose Currently unused. A boolean indicating whether diagnostic | ||
#' messages should be printed. Defaults to \code{FALSE}. | ||
#' @return A \code{matrix} | ||
distByRows <- function(normF = c("L1", "L2", "L3", "L4", "L5", "L6", "max"), | ||
validX, trainX, keepX, verbose = FALSE) { | ||
normF = match.arg(normF) | ||
|
||
if(is.vector(validX)|is.matrix(validX)) { | ||
if(is.vector(validX)) { | ||
validX = as.data.frame(t(validX)) | ||
} | ||
colnames(validX) = colnames(trainX) | ||
} | ||
|
||
# for every validation set observation: | ||
newTrainX = apply(validX, 1, normalizeFeatures, trainX, keepX) | ||
# now we have a list with length = number of rows in validX | ||
# each element is a new trainX data.frame tailored to that validation set observation | ||
|
||
# sapply(norms, function(d) t(sapply(newTrainX, rowNorms, d)), simplify = FALSE) | ||
res = sapply(newTrainX, rowNorms, normF) | ||
# TODO: parallelize | ||
return(t(res)) | ||
} |
Oops, something went wrong.