Skip to content

Commit 920bc21

Browse files
committed
get_SDA_property: generalization for CTEs for multiple properties per query
1 parent 89fc835 commit 920bc21

File tree

2 files changed

+218
-6
lines changed

2 files changed

+218
-6
lines changed

R/get_SDA_property.R

+205-6
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ get_SDA_property <-
143143
bottom_depth = bottom_depth,
144144
include_minors = include_minors,
145145
miscellaneous_areas = miscellaneous_areas,
146-
FUN = FUN)
146+
FUN = FUN,
147+
sqlite_dialect = !is.null(dsn))
147148

148149
if (query_string) return(q)
149150

@@ -263,8 +264,9 @@ get_SDA_property <-
263264
areasymbols = NULL, mukeys = NULL, WHERE = NULL,
264265
top_depth = 0, bottom_depth = 200, FUN = NULL,
265266
include_minors = FALSE,
266-
miscellaneous_areas = FALSE) {
267-
# SQL by Jason Nemecek
267+
miscellaneous_areas = FALSE,
268+
sqlite_dialect = FALSE) {
269+
# SQL by Jason Nemecek; CTE style and multi-property support by Andrew Brown
268270

269271
if (is.null(mukeys) && is.null(areasymbols) && is.null(WHERE)) {
270272
stop("Please specify one of the following arguments: mukeys, areasymbols, WHERE", call. = FALSE)
@@ -365,7 +367,204 @@ get_SDA_property <-
365367
ifelse(miscellaneous_areas, ""," AND component.compkind != 'Miscellaneous area'"),
366368
top_depth, bottom_depth, top_depth, bottom_depth, property)
367369
}
368-
370+
371+
372+
.property_weighted_average_CTE <- function(property,
373+
top_depth,
374+
bottom_depth,
375+
WHERE,
376+
dominant = FALSE,
377+
include_minors = FALSE,
378+
miscellaneous_areas = FALSE,
379+
sqlite_dialect = FALSE) {
380+
381+
n <- seq(property)
382+
stopifnot(length(n) > 0)
383+
384+
if (missing(WHERE)) {
385+
stop("WHERE clause must be specified")
386+
}
387+
388+
if (!is.numeric(top_depth) && !is.numeric(bottom_depth)) {
389+
stop("Top and bottom depth must be numeric")
390+
}
391+
392+
MINORS <- ifelse(include_minors, "", "AND component.majcompflag = 'Yes'")
393+
394+
MISCAREAS <- ifelse(miscellaneous_areas, "", "AND component.compkind != 'Miscellaneous area'")
395+
396+
DOMINANT <- ifelse(dominant, paste0(
397+
"AND component.cokey = (SELECT ", ifelse(!sqlite_dialect, "TOP 1 ", " "), "c2.cokey FROM component AS c2
398+
INNER JOIN mapunit AS mm1 ON c2.mukey = mm1.mukey AND c2.mukey = mapunit.mukey ",
399+
gsub("component", "c2", MISCAREAS),
400+
"ORDER BY c2.comppct_r DESC, c2.cokey ", ifelse(sqlite_dialect, "LIMIT 1", ""), ")"), "")
401+
402+
PROPERTY <- paste0(property, collapse = ", ")
403+
404+
PROPHZWTS <- paste0(.gluelite("CASE
405+
WHEN main.{property} IS NULL THEN 0
406+
ELSE (main.hzdepb_r_ADJ - main.hzdept_r_ADJ)
407+
END AS thickness_wt_{property},
408+
SUM(CASE
409+
WHEN main.{property} IS NULL THEN 0
410+
ELSE (main.hzdepb_r_ADJ - main.hzdept_r_ADJ)
411+
END) OVER (PARTITION BY main.cokey) AS sum_thickness_{property},
412+
main.{property}"), collapse = ",\n")
413+
414+
MUPROPWTS <- paste0(.gluelite("CASE
415+
WHEN comppct_r = SUM_COMP_PCT THEN 1
416+
ELSE CAST(comppct_r AS REAL) / SUM_COMP_PCT
417+
END AS WEIGHTED_COMP_PCT_{property}"), collapse = ",\n")
418+
419+
COMPWTS <- paste0(.gluelite("CASE
420+
WHEN sum_thickness_{property} = 0 THEN 0
421+
ELSE comp_temp3.WEIGHTED_COMP_PCT_{property}
422+
END AS CORRECT_COMP_PCT_{property}"), collapse = ",\n")
423+
424+
RATEDWTS <- paste0(.gluelite("SUM(CORRECT_COMP_PCT_{property}) AS RATED_PCT_{property}"),
425+
collapse = ",\n")
426+
427+
COMPRATEDWTS <- paste0(.gluelite("RATED_PCT_{property}"), collapse = ", ")
428+
429+
COMPWTDAVG <- paste0(.gluelite("SUM(
430+
CAST(weights.CORRECT_COMP_PCT_{property} AS REAL)
431+
* comp_temp2.thickness_wt_{property} / NULLIF(comp_temp2.sum_thickness_{property}, 0)
432+
* comp_temp2.{property}
433+
) AS COMP_WEIGHTED_AVERAGE_{property}"), collapse = ",\n")
434+
435+
COMPRATEDAVG <- paste0(.gluelite("COMP_WEIGHTED_AVERAGE_{property}"), collapse = ", ")
436+
437+
MUWTDAVG <- paste0(.gluelite("CAST (SUM(
438+
(CASE
439+
WHEN last_step.RATED_PCT_{property} = 0 THEN 0
440+
ELSE last_step.COMP_WEIGHTED_AVERAGE_{property}
441+
END) / NULLIF(last_step.RATED_PCT_{property}, 0))
442+
OVER (PARTITION BY kitchensink.mukey) AS REAL) AS {property}"), collapse = ",\n")
443+
444+
q <- .gluelite("
445+
WITH kitchensink AS (
446+
SELECT mukey, areasymbol, musym, muname
447+
FROM legend
448+
INNER JOIN mapunit ON mapunit.lkey = legend.lkey AND {WHERE}
449+
),
450+
comp_temp AS (
451+
SELECT
452+
mapunit.mukey,
453+
component.cokey,
454+
component.comppct_r,
455+
component.compkind,
456+
component.majcompflag,
457+
SUM(component.comppct_r) OVER (PARTITION BY mapunit.mukey) AS SUM_COMP_PCT
458+
FROM legend
459+
INNER JOIN mapunit ON mapunit.lkey = legend.lkey AND {WHERE}
460+
INNER JOIN component ON component.mukey = mapunit.mukey {MISCAREAS} {MINORS} {DOMINANT}
461+
),
462+
comp_temp3 AS (
463+
SELECT cokey, compkind, majcompflag, SUM_COMP_PCT,
464+
{MUPROPWTS}
465+
FROM comp_temp
466+
),
467+
main AS (
468+
SELECT
469+
mapunit.mukey,
470+
legend.areasymbol,
471+
mapunit.musym,
472+
mapunit.muname,
473+
component.cokey,
474+
chorizon.chkey,
475+
component.compname,
476+
component.compkind,
477+
chorizon.hzname,
478+
chorizon.hzdept_r,
479+
chorizon.hzdepb_r,
480+
CASE WHEN chorizon.hzdept_r < {top_depth} THEN {top_depth} ELSE chorizon.hzdept_r END AS hzdept_r_ADJ,
481+
CASE WHEN chorizon.hzdepb_r > {bottom_depth} THEN {bottom_depth} ELSE chorizon.hzdepb_r END AS hzdepb_r_ADJ,
482+
component.comppct_r,
483+
{PROPERTY}
484+
FROM legend
485+
INNER JOIN mapunit
486+
ON mapunit.lkey = legend.lkey
487+
INNER JOIN component
488+
ON component.mukey = mapunit.mukey {MISCAREAS} {MINORS}
489+
INNER JOIN chorizon
490+
ON chorizon.cokey = component.cokey
491+
AND chorizon.hzdepb_r > {top_depth}
492+
AND chorizon.hzdept_r <= {bottom_depth}
493+
WHERE {WHERE}
494+
),
495+
comp_temp2 AS (
496+
SELECT
497+
main.mukey,
498+
main.areasymbol,
499+
main.musym,
500+
main.muname,
501+
main.cokey,
502+
main.chkey,
503+
main.compname,
504+
main.compkind,
505+
main.hzname,
506+
main.hzdept_r,
507+
main.hzdepb_r,
508+
main.hzdept_r_ADJ,
509+
main.hzdepb_r_ADJ,
510+
main.comppct_r,
511+
{PROPHZWTS}
512+
FROM main
513+
),
514+
weights AS (
515+
SELECT DISTINCT
516+
comp_temp2.mukey,
517+
comp_temp2.cokey,
518+
{COMPWTS}
519+
FROM comp_temp2
520+
LEFT JOIN comp_temp3 ON comp_temp3.cokey = comp_temp2.cokey
521+
),
522+
weights2 AS (
523+
SELECT
524+
mukey,
525+
{RATEDWTS}
526+
FROM weights
527+
GROUP BY mukey
528+
),
529+
last_step AS (
530+
SELECT
531+
comp_temp2.mukey,
532+
comp_temp2.cokey,
533+
{COMPRATEDWTS},
534+
{COMPWTDAVG}
535+
FROM comp_temp2
536+
LEFT JOIN weights ON weights.cokey = comp_temp2.cokey
537+
LEFT JOIN weights2 ON weights2.mukey = comp_temp2.mukey
538+
GROUP BY comp_temp2.mukey, comp_temp2.cokey, {COMPRATEDWTS}
539+
),
540+
last_step2 AS (
541+
SELECT
542+
kitchensink.mukey,
543+
last_step.cokey,
544+
kitchensink.areasymbol,
545+
kitchensink.musym,
546+
kitchensink.muname,
547+
{COMPRATEDWTS},
548+
{MUWTDAVG}
549+
FROM last_step
550+
RIGHT JOIN kitchensink ON kitchensink.mukey = last_step.mukey
551+
GROUP BY kitchensink.areasymbol, kitchensink.musym, kitchensink.muname, kitchensink.mukey, {COMPRATEDWTS}, {COMPRATEDAVG}, last_step.cokey
552+
)
553+
SELECT
554+
last_step2.mukey,
555+
last_step2.areasymbol,
556+
last_step2.musym,
557+
last_step2.muname,
558+
{PROPERTY}
559+
FROM last_step2
560+
LEFT JOIN last_step
561+
ON last_step.mukey = last_step2.mukey
562+
GROUP BY last_step2.areasymbol, last_step2.musym, last_step2.muname, last_step2.mukey, {PROPERTY}
563+
ORDER BY last_step2.mukey, last_step2.areasymbol, last_step2.musym, last_step2.muname, {PROPERTY};
564+
")
565+
q
566+
}
567+
369568
.property_weighted_average <- function(property,
370569
top_depth,
371570
bottom_depth,
@@ -500,8 +699,8 @@ get_SDA_property <-
500699
WHERE,
501700
ifelse(miscellaneous_areas, ""," AND c1.compkind != 'Miscellaneous area'")),
502701

503-
# weighted average (.weighted_average handles vector agg_property)
504-
"WEIGHTED AVERAGE" = .property_weighted_average(agg_property, top_depth, bottom_depth, WHERE, include_minors = include_minors, miscellaneous_areas = miscellaneous_areas),
702+
# weighted average (weighted_average handles vector agg_property)
703+
"WEIGHTED AVERAGE" = .property_weighted_average_CTE(agg_property, top_depth, bottom_depth, WHERE, include_minors = include_minors, miscellaneous_areas = miscellaneous_areas, dominant = FALSE, sqlite_dialect = sqlite_dialect),
505704
"MIN/MAX" =
506705
sprintf("SELECT mapunit.mukey, areasymbol, musym, muname, %s
507706
INTO #funagg

R/utils.R

+13
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,19 @@
22
## misc functions used by soilDB
33
##
44

5+
## simplfied base R implementation of glue::glue()
6+
# x: character vector with simple braced expressions to replace (NOT evaluate)
7+
# env: environment where expression values are defined (default: `parent.frame()`)
8+
.gluelite <- function(x, env = parent.frame()) {
9+
as.character(unlist(sapply(x, function(y) {
10+
vars <- regmatches(y, gregexpr("\\{[^{}]+\\}", y))[[1]]
11+
uvars <- unique(vars)
12+
vals <- lapply(uvars, function(var) unique(get(gsub("[{}]", "", var), env, inherits = TRUE)))
13+
unique(apply(expand.grid(vals, stringsAsFactors = FALSE), 1, function(z)
14+
Reduce(function(y, var) sub(var, z[match(var, uvars)], y, fixed = TRUE), vars, y)))
15+
}, simplify = FALSE)))
16+
}
17+
518
# convert diagnostic horizon info into wide-formatted, boolean table
619
.diagHzLongtoWide <- function(d, feature = 'featkind', id = 'peiid') {
720

0 commit comments

Comments
 (0)