Skip to content

Commit

Permalink
finishing up #302
Browse files Browse the repository at this point in the history
  • Loading branch information
dylanbeaudette committed Dec 5, 2023
1 parent 7eda770 commit b05a297
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 28 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: aqp
Version: 2.0.2
Version: 2.0.3
Title: Algorithms for Quantitative Pedology
Authors@R: c(person(given="Dylan", family="Beaudette", role = c("aut", "cre"), email = "dylan.beaudette@usda.gov"), person(given="Pierre", family="Roudier", email="roudierp@landcareresearch.co.nz", role = c("aut", "ctb")), person(given="Andrew", family="Brown", email="andrew.g.brown@usda.gov", role = c("aut", "ctb")))
Author: Dylan Beaudette [aut, cre], Pierre Roudier [aut, ctb], Andrew Brown [aut, ctb]
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# aqp 2.0.3 (2023-12-05)
* performance improvements in `profileInformationIndex()`

# aqp 2.0.2 (2023-11-18)
* CRAN release
* bug fix / enhancements in `evalMissingData()`
Expand Down
61 changes: 42 additions & 19 deletions R/profileInformationIndex.R
Original file line number Diff line number Diff line change
Expand Up @@ -323,31 +323,54 @@ profileInformationIndex <- function(x, vars, method = c('joint', 'individual'),
}


## TODO: this will error / drop profiles in the presence of bad horizonation
## note: this will error / drop profiles in the presence of bad horizonation

# dice() to 1cm intervals for common baseline
## dice() to 1cm intervals for common baseline
# -> 10 horizons of the same data NOT more informative than 1 horizon
# -> causes data corruption when bad hz depths present (lots of messages)
x <- dice(x, fill = padNA)

## TODO: convert to data.table
# SPC = FALSE
# data.table()
# dt[, ]
# as.vector()

# iterate over profiles
## iterate over profiles
# result is a vector suitable for site-level attribute
res <- profileApply(
x,
simplify = TRUE,
FUN = .PII_by_profile,
vars = vars,
baseline = baseline,
method = method,
numericDigits = numericDigits,
compression = compression
)

## slow / but simple to understand and test
# res <- profileApply(
# x,
# simplify = TRUE,
# FUN = .PII_by_profile,
# vars = vars,
# baseline = baseline,
# method = method,
# numericDigits = numericDigits,
# compression = compression
# )
# return(res)

## fast via data.table
.idn <- idname(x)
.pIDs <- profile_id(x)

# result is data.table: id + PII
# work on horizon data as data.frame
x <- data.table(horizons(x))
res <- x[ ,
.PII_by_profile(
x = .SD,
vars = vars,
baseline = baseline,
method = method,
numericDigits = numericDigits,
compression = compression
),
by = .idn
]

# double check order is preserved
stopifnot(all(res$id == .pIDs))

# down-grade data.table to named numeric vector
res <- as.vector(res$V1)
names(res) <- .pIDs

# done
return(res)
Expand Down
53 changes: 45 additions & 8 deletions misc/sandbox/profile-information-index.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ y <- cbind(
runif(n, min = min(x), max = max(x)),
runif(n, min = min(x), max = max(x)),
runif(n, min = min(x), max = max(x))
)
)
cor(y)

.cols <- hcl.colors(n = 100, palette = 'zissou 1')
Expand Down Expand Up @@ -425,12 +425,15 @@ x <- combine(x)

vars <- c('hzname', 'hue', 'value', 'chroma', 'texture_class')


z <- data.frame(
baseline.joint = profileInformationIndex(x, vars = vars, baseline = TRUE, method = 'j', padNA = FALSE),
baseline.individual = profileInformationIndex(x, vars = vars, baseline = TRUE, method = 'i', padNA = FALSE),
joint = profileInformationIndex(x, vars = vars, baseline = FALSE, method = 'j', padNA = FALSE),
individual = profileInformationIndex(x, vars = vars, baseline = FALSE, method = 'i', padNA = FALSE)
# data.table: 44 seconds
# profileApply:
system.time(
z <- data.frame(
baseline.joint = profileInformationIndex(x, vars = vars, baseline = TRUE, method = 'j', padNA = FALSE),
baseline.individual = profileInformationIndex(x, vars = vars, baseline = TRUE, method = 'i', padNA = FALSE),
joint = profileInformationIndex(x, vars = vars, baseline = FALSE, method = 'j', padNA = FALSE),
individual = profileInformationIndex(x, vars = vars, baseline = FALSE, method = 'i', padNA = FALSE)
)
)

cor(z)
Expand Down Expand Up @@ -502,7 +505,7 @@ ggplot(site(x), aes(x = pi, y = greatgroup)) +
stat_summary(geom = 'point', fun = median, shape = 21, fill = 'black', col = 'white', cex = 3) +
scale_color_brewer() +
scale_x_continuous(n.breaks = 16) +
xlab('Profile Information Index') + ylab('') +
xlab('Profile Information Index (bytes)') + ylab('') +
labs(title = 'Profile Information Index for Select Greatgroup Taxa', color = 'Interval')


Expand Down Expand Up @@ -579,3 +582,37 @@ par(mar = c(3, 0, 0, 2))
plotSPC(z1, color = 'p1', plot.order = order(z1$pi), print.id = FALSE, width = 0.35, divide.hz = FALSE)
axis(side = 1, at = 1:length(z1), labels = format(z1$pi[order(z1$pi)], digits = 3), cex.axis = 0.66)



z1 <- lapply(
1:10000,
random_profile,
n = 5,
exact = TRUE,
n_prop = 3,
SPC = TRUE,
method = 'LPP',
lpp.a = 5,
lpp.b = 10,
lpp.d = 5,
lpp.e = 5,
lpp.u = 25,
min_thick = 2,
max_thick = 50
)

z1 <- combine(z1)

# 1k : 4.6 seconds
# 10k : 55 seconds
system.time(
old <- profileInformationIndex(z1, vars = c('p1', 'p2', 'p3'), method = 'j', scale = TRUE, baseline = TRUE, padNA = FALSE)
)

# 1k : 1.56 seconds
# 10k : 18 seconds
system.time(
new <- profileInformationIndex(z1, vars = c('p1', 'p2', 'p3'), method = 'j', scale = TRUE, baseline = TRUE, padNA = FALSE)
)


0 comments on commit b05a297

Please sign in to comment.