Skip to content

Commit

Permalink
Convert NASIS-related queries from RODBC->DBI #146
Browse files Browse the repository at this point in the history
  • Loading branch information
brownag committed Dec 3, 2020
1 parent 0f12e74 commit f445e89
Show file tree
Hide file tree
Showing 26 changed files with 699 additions and 680 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: soilDB
Type: Package
Title: Soil Database Interface
Version: 2.5.9
Date: 2020-11-30
Version: 2.6.0
Date: 2020-12-02
Authors@R: c(person(given="Dylan", family="Beaudette", role = c("aut"), email = "dylan.beaudette@usda.gov"),
person(given="Jay", family="Skovlin", role = c("aut")),
person(given="Stephen", family="Roecker", role = c("aut")),
Expand All @@ -14,8 +14,8 @@ License: GPL (>= 3)
LazyLoad: yes
Depends: R (>= 3.5.0)
Imports: aqp, grDevices, graphics, stats, utils, plyr, xml2, sp, reshape2,
raster, curl, lattice, methods, data.table
Suggests: rgdal, jsonlite, RODBC, httr, sf, rgeos, rvest,
raster, curl, lattice, methods, data.table, DBI, odbc, RSQLite
Suggests: rgdal, jsonlite, RODBC, httr, sf, rgeos, rvest,
testthat, stringr, latticeExtra,
RCurl, XML, ggplot2, gridExtra, viridis, mapview, rasterVis
Repository: CRAN
Expand Down
11 changes: 11 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@ importFrom(data.table,
data.table,
as.data.table)

importFrom(DBI,
dbGetQuery,
dbConnect,
dbSendQuery,
dbFetch
)

importFrom(odbc, odbc)

importFrom(RSQLite, SQLite)

importFrom(reshape2,
dcast,
melt
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# soilDB 2.6.0 (2020-12-02)
* Connections to the local NASIS database now use `DBI` and `odbc` instead of `RODBC`. Two new methods `dbConnectNASIS` and `dbQueryNASIS` facilitate access with read-only credentials, submission of queries/fetching of results, and closing the DBI connection upon completion.

# soilDB 2.5.9 (2020-11-30)
* new function `ROSETTA` for accessing the new ROSETTA model API (c/o Todd Skaggs, USDA-ARS)
* `fetchOSD(..., extended=TRUE)` gains geographically associated soils, parsed from OSD (thanks AGB)
Expand Down
45 changes: 45 additions & 0 deletions R/dbQueryNASIS.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#' Send queries to a NASIS DBIConnection
#'
#' @param conn A \code{DBIConnection} object, as returned by \code{DBI::dbConnect()}.
#' @param q A statement to execute using \code{DBI::dbGetQuery}
#' @param close Close connection after query? Default: \code{TRUE}
#' @param ... Additional arguments to \code{DBI::dbGetQuery}
#'
#' @return Result of \code{DBI::dbGetQuery}
#' @export
#'
#' @importFrom DBI dbGetQuery, dbDisconnect
dbQueryNASIS <- function(conn, q, close = TRUE, ...) {

if (inherits(conn, 'try-error'))
stop("Failed to connect to NASIS database!")

## exec query
d <- DBI::dbGetQuery(conn, q, ...)
# res <- DBI::dbSendQuery(conn, q)
# d <- DBI::dbFetch(res)
# d <- RODBC::sqlQuery(channel, q, stringsAsFactors=FALSE)

dd <- data.frame(d)
## close connection
if (close == TRUE)
DBI::dbDisconnect(conn)
# RODBC::odbcClose(channel)

return(dd)
}

#' Create a connection to a local NASIS database
#'
#' @return A \code{DBIConnection} object, as returned by \code{DBI::dbConnect()}.
#'
#' @export
#'
dbConnectNASIS <- function() {
# TODO: NASIS sqlite snapshot connection via DBI/RSQLite

# default connection uses DBI/odbc (historically RODBC)
res <- .openNASISchannel()

return(res)
}
90 changes: 44 additions & 46 deletions R/fetchNASIS_pedons.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,54 +5,54 @@
soilColorState='moist', lab=FALSE, stringsAsFactors = default.stringsAsFactors()) {

# test connection
if(! 'nasis_local' %in% names(RODBC::odbcDataSources()))
if (!'nasis_local' %in% names(RODBC::odbcDataSources()))
stop('Local NASIS ODBC connection has not been setup. Please see `http://ncss-tech.github.io/AQP/soilDB/setup_local_nasis.html`.')

# sanity check
if(! soilColorState %in% c('dry', 'moist'))
if (!soilColorState %in% c('dry', 'moist'))
stop('soilColorState must be either `dry` or `moist`', call. = FALSE)

## load data in pieces
# these fail gracefully when no data in local DB | selected set
site_data <- get_site_data_from_NASIS_db(SS=SS, stringsAsFactors = stringsAsFactors)
hz_data <- get_hz_data_from_NASIS_db(SS=SS, stringsAsFactors = stringsAsFactors)
color_data <- get_colors_from_NASIS_db(SS=SS)
site_data <- get_site_data_from_NASIS_db(SS = SS, stringsAsFactors = stringsAsFactors)
hz_data <- get_hz_data_from_NASIS_db(SS = SS, stringsAsFactors = stringsAsFactors)
color_data <- get_colors_from_NASIS_db(SS = SS)

## ensure there are enough data to create an SPC object
if (nrow(hz_data) == 0) {
stop('No site/pedons objects in local NASIS DB or selected set.', call. = FALSE)
}

# data that cannot be effectively flattened in SQL
extended_data <- get_extended_data_from_NASIS_db(SS=SS, nullFragsAreZero=nullFragsAreZero, stringsAsFactors = stringsAsFactors)
extended_data <- get_extended_data_from_NASIS_db(SS = SS, nullFragsAreZero = nullFragsAreZero, stringsAsFactors = stringsAsFactors)

## join horizon + hz color: all horizons
h <- merge(hz_data, color_data, by='phiid', all.x=TRUE, sort=FALSE)

# check for empty fragment summary and nullFragsAreZero
if(nullFragsAreZero & all(is.na(unique(extended_data$frag_summary$phiid))))
if (nullFragsAreZero & all(is.na(unique(extended_data$frag_summary$phiid))))
extended_data$frag_summary <- cbind(phiid = unique(h$phiid), extended_data$frag_summary[,-1])

## join hz + fragment summary
h <- merge(h, extended_data$frag_summary, by='phiid', all.x=TRUE, sort=FALSE)
h <- merge(h, extended_data$frag_summary, by = 'phiid', all.x = TRUE, sort = FALSE)

# check for empty artifact summary and nullFragsAreZero
if(nullFragsAreZero & all(is.na(unique(extended_data$art_summary$phiid))))
if (nullFragsAreZero & all(is.na(unique(extended_data$art_summary$phiid))))
extended_data$art_summary <- cbind(phiid = unique(h$phiid), extended_data$art_summary[,-1])

# join hz + artifact summary
h <- merge(h, extended_data$art_summary, by='phiid', all.x=TRUE, sort=FALSE)
h <- merge(h, extended_data$art_summary, by = 'phiid', all.x = TRUE, sort = FALSE)

## fix some common problems

# replace missing lower boundaries
missing.lower.depth.idx <- which(!is.na(h$hzdept) & is.na(h$hzdepb))

# keep track of affected pedon IDs (if none, this will have zero length)
assign('missing.bottom.depths', value=unique(h$pedon_id[missing.lower.depth.idx]), envir=soilDB.env)
assign('missing.bottom.depths', value = unique(h$pedon_id[missing.lower.depth.idx]), envir = soilDB.env)

if(length(missing.lower.depth.idx) > 0) {
message(paste('replacing missing lower horizon depths with top depth + 1cm ... [', length(missing.lower.depth.idx), ' horizons]', sep=''))
if (length(missing.lower.depth.idx) > 0) {
message(paste0('replacing missing lower horizon depths with top depth + 1cm ... [', length(missing.lower.depth.idx), ' horizons]'))

# make edit
h$hzdepb[missing.lower.depth.idx] <- h$hzdept[missing.lower.depth.idx] + 1
Expand All @@ -62,10 +62,10 @@
top.eq.bottom.idx <- which(h$hzdept == h$hzdepb)

# keep track of affected pedon IDs (if none, this will have zero length)
assign('top.bottom.equal', value=unique(h$pedon_id[ top.eq.bottom.idx]), envir=soilDB.env)
assign('top.bottom.equal', value = unique(h$pedon_id[ top.eq.bottom.idx]), envir = soilDB.env)

if(length(top.eq.bottom.idx) > 0) {
message(paste('top/bottom depths equal, adding 1cm to bottom depth ... [', length(top.eq.bottom.idx), ' horizons]', sep=''))
if (length(top.eq.bottom.idx) > 0) {
message(paste0('top/bottom depths equal, adding 1cm to bottom depth ... [', length(top.eq.bottom.idx), ' horizons]'))

# make the edit
h$hzdepb[top.eq.bottom.idx] <- h$hzdepb[top.eq.bottom.idx] + 1
Expand All @@ -74,37 +74,36 @@

## copy pre-computed colors into a convenience field for plotting
# moist colors
if(soilColorState == 'moist')
if (soilColorState == 'moist')
h$soil_color <- h$moist_soil_color

# dry colors
if(soilColorState == 'dry')
if (soilColorState == 'dry')
h$soil_color <- h$dry_soil_color


## test for horizonation inconsistencies... flag, and optionally remove
# ~ 1.3 seconds / ~ 4k pedons
h.test <- do.call('rbind', lapply(split(h, h$peiid), function(d) {
res <- aqp::hzDepthTests(top=d[['hzdept']], bottom=d[['hzdepb']])
return(data.frame(peiid = d$peiid, hz_logic_pass=all(!res)))
res <- aqp::hzDepthTests(top = d[['hzdept']], bottom = d[['hzdepb']])
return(data.frame(peiid = d$peiid, hz_logic_pass = all(!res)))
}))

# which are the good (valid) ones?
good.ids <- as.character(h.test$peiid[which(h.test$hz_logic_pass)])
bad.ids <- as.character(h.test$peiid[which(!h.test$hz_logic_pass)])
bad.horizons<- h[which(!h.test$hz_logic_pass), c(1:4,6,7)]
bad.horizons <- h[which(!h.test$hz_logic_pass), c(1:4,6,7)]
bad.pedon.ids <- site_data$pedon_id[which(site_data$peiid %in% bad.ids)]

# optionally filter pedons WITH NO horizonation inconsistencies
if(rmHzErrors)
if (rmHzErrors)
h <- h[which(h$peiid %in% good.ids), ]

# keep track of those pedons with horizonation errors
assign('bad.pedon.ids', value=bad.pedon.ids, envir=soilDB.env)
assign('bad.pedon.ids', value = bad.pedon.ids, envir = soilDB.env)
assign("bad.horizons", value = data.frame(bad.horizons), envir = soilDB.env)

## optionally convert NA fragvol to 0
if(nullFragsAreZero) {
if (nullFragsAreZero) {
# this is the "total fragment volume" per NASIS calculation
h$fragvoltot <- ifelse(is.na(h$fragvoltot), 0, h$fragvoltot)

Expand Down Expand Up @@ -151,7 +150,7 @@

# load best-guess optimal records from ecositehistory
# method is added to the new field called 'es_selection_method'
ed.es <- split(extended_data$ecositehistory, extended_data$siteiid)
ed.es <- split(extended_data$ecositehistory, extended_data$ecositehistory$siteiid)
best.ecosite.data <- do.call('rbind', lapply(ed.es, .pickBestEcosite))
site(h) <- best.ecosite.data

Expand All @@ -164,11 +163,11 @@
# add surface frag summary
sfs <- extended_data$surf_frag_summary
# optionally convert NA fragvol to 0
if(nullFragsAreZero) {
if (nullFragsAreZero) {
sfs <- as.data.frame(
cbind(sfs[, 1, drop=FALSE],
cbind(sfs[, 1, drop = FALSE],
lapply(sfs[, -1], function(i) ifelse(is.na(i), 0, i))
), stringsAsFactors=FALSE)
), stringsAsFactors = FALSE)
}

# add surf. frag summary to @site
Expand All @@ -184,12 +183,12 @@

# join-in landform string
ed.lf <- split(extended_data$geomorph, extended_data$geomorph$peiid)
lf <- do.call('rbind', lapply(ed.lf, .formatLandformString, name.sep=' & '))
lf <- do.call('rbind', lapply(ed.lf, .formatLandformString, name.sep = ' & '))
site(h) <- lf

# join-in parent material strings
ed.pm <- split(extended_data$pm, extended_data$siteiid)
pm <- do.call('rbind', lapply(ed.pm, .formatParentMaterialString, name.sep=' & '))
ed.pm <- split(extended_data$pm, extended_data$pm$siteiid)
pm <- do.call('rbind', lapply(ed.pm, .formatParentMaterialString, name.sep = ' & '))
site(h) <- pm

# set metadata
Expand All @@ -198,18 +197,18 @@
metadata(h) <- m

# print any messages on possible data quality problems:
if(exists('sites.missing.pedons', envir=soilDB.env))
if(length(get('sites.missing.pedons', envir=soilDB.env)) > 0)
if (exists('sites.missing.pedons', envir = soilDB.env))
if (length(get('sites.missing.pedons', envir = soilDB.env)) > 0)
message("-> QC: sites without pedons: use `get('sites.missing.pedons', envir=soilDB.env)` for related usersiteid values")

if(exists('dup.pedon.ids', envir=soilDB.env))
if(length(get('dup.pedon.ids', envir=soilDB.env)) > 0)
if (exists('dup.pedon.ids', envir = soilDB.env))
if (length(get('dup.pedon.ids', envir = soilDB.env)) > 0)
message("-> QC: duplicate pedons: use `get('dup.pedon.ids', envir=soilDB.env)` for related peiid values")

# set NASIS-specific horizon identifier
tryCatch(hzidname(h) <- 'phiid', error = function(e) {
if(grepl(e$message, pattern="not unique$")) {
if(!rmHzErrors) {
if (grepl(e$message, pattern = "not unique$")) {
if (!rmHzErrors) {
# if rmHzErrors = FALSE, keep unique integer assigned ID to all records automatically
message("-> QC: duplicate horizons are present with rmHzErrors=FALSE! defaulting to `hzID` as unique horizon ID.")
} else {
Expand All @@ -223,24 +222,23 @@
hzdesgnname(h) <- "hzname"
hztexclname(h) <- "texture"

if(exists('bad.pedon.ids', envir=soilDB.env))
if(length(get('bad.pedon.ids', envir=soilDB.env)) > 0)
if (exists('bad.pedon.ids', envir = soilDB.env))
if (length(get('bad.pedon.ids', envir = soilDB.env)) > 0)
message("-> QC: horizon errors detected, use `get('bad.pedon.ids', envir=soilDB.env)` for related userpedonid values or `get('bad.horizons', envir=soilDB.env)` for related horizon designations")

if(exists('missing.bottom.depths', envir=soilDB.env))
if(length(get('missing.bottom.depths', envir=soilDB.env)) > 0)
if (exists('missing.bottom.depths', envir = soilDB.env))
if (length(get('missing.bottom.depths', envir = soilDB.env)) > 0)
message("-> QC: pedons missing bottom hz depths: use `get('missing.bottom.depths', envir=soilDB.env)` for related pedon IDs")

if(exists('top.bottom.equal', envir=soilDB.env))
if(length(get('top.bottom.equal', envir=soilDB.env)) > 0)
if (exists('top.bottom.equal', envir = soilDB.env))
if (length(get('top.bottom.equal', envir = soilDB.env)) > 0)
message("-> QC: equal hz top and bottom depths: use `get('top.bottom.equal', envir=soilDB.env)` for related pedon IDs")

## https://github.com/ncss-tech/soilDB/issues/44
# optionally load phlabresults table
if (lab) {
phlabresults <- .get_phlabresults_data_from_NASIS_db(SS=SS)
phlabresults <- .get_phlabresults_data_from_NASIS_db(SS = SS)
horizons(h) <- phlabresults
#h <- join(h, phlabresults, by = "phiid", type = "left")
}

# done
Expand Down
22 changes: 10 additions & 12 deletions R/get_RMF_from_NASIS_db.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
get_RMF_from_NASIS_db <- function(SS=TRUE) {
# must have RODBC installed
if(!requireNamespace('RODBC'))
if (!requireNamespace('RODBC'))
stop('please install the `RODBC` package', call.=FALSE)

# RMF
Expand All @@ -19,22 +19,20 @@ get_RMF_from_NASIS_db <- function(SS=TRUE) {
FROM phredoxfcolor_View_1
ORDER BY phrdxfiidref, colormoistst;"

channel <- .openNASISchannel()
if (channel == -1)
channel <- dbConnectNASIS()

if (inherits(channel, 'try-error'))
return(data.frame())

# toggle selected set vs. local DB
if(SS == FALSE) {
if (SS == FALSE) {
q <- gsub(pattern = '_View_1', replacement = '', x = q, fixed = TRUE)
q.c <- gsub(pattern = '_View_1', replacement = '', x = q.c, fixed = TRUE)
}

# exec queries
d <- RODBC::sqlQuery(channel, q, stringsAsFactors=FALSE)
d.c <- RODBC::sqlQuery(channel, q.c, stringsAsFactors=FALSE)

# close connection
RODBC::odbcClose(channel)
d <- dbQueryNASIS(channel, q, close = FALSE)
d.c <- dbQueryNASIS(channel, q.c)

# uncode domained columns
d <- uncode(d)
Expand All @@ -43,12 +41,12 @@ get_RMF_from_NASIS_db <- function(SS=TRUE) {
# convert back to characters / numeric
d.c$colormoistst <- as.character(d.c$colormoistst)
d.c$colorhue <- as.character(d.c$colorhue)
# careful!

# uncode creates factors, so we have to convert to character first
d.c$colorvalue <- as.numeric(as.character(d.c$colorvalue))
d.c$colorchroma <- as.numeric(as.character(d.c$colorchroma))


# done
return(list(RMF=d, RMF_colors=d.c))
return(list(RMF = d, RMF_colors = d.c))
}
Loading

0 comments on commit f445e89

Please sign in to comment.