Skip to content

Commit

Permalink
remove hashing (#119)
Browse files Browse the repository at this point in the history
related to insightsengineering/teal#751

remove datasets hashing and transfer it to `teal`.

Co-authored-by: 27856297+dependabot-preview[bot]@users.noreply.github.com <27856297+dependabot-preview[bot]@users.noreply.github.com>
  • Loading branch information
mhallal1 and dependabot-preview[bot] authored Nov 11, 2022
1 parent 7796546 commit 71bf77c
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 65 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

* Examples now use `scda.2022` instead of `scda.2021`.
* Modified `teal.Dataset$print` method for a less cluttered output.
* Transferred data hashing step in `TealDataset` and `MAETealDataset` to `teal`.

# teal.data 0.1.2

Expand Down
7 changes: 4 additions & 3 deletions R/MAETealDataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ MAETealDataset <- R6::R6Class( # nolint
self$set_vars(vars)
self$set_dataset_label(label)
self$set_keys(keys)
private$calculate_hash()

# needed if recreating dataset - we need to preserve code order and uniqueness
private$code <- CodeClass$new()
Expand All @@ -96,7 +95,9 @@ MAETealDataset <- R6::R6Class( # nolint
#' `TRUE` if the dataset generated from evaluating the
#' `get_code()` code is identical to the raw data, else `FALSE`.
check = function() {
logger::log_trace("TealDataset$check executing the code to reproduce dataset: { deparse1(self$get_dataname()) }...")
logger::log_trace(
"TealDataset$check executing the code to reproduce dataset: { deparse1(self$get_dataname()) }..."
)
if (!checkmate::test_character(self$get_code(), len = 1, pattern = "\\w+")) {
stop(
sprintf(
Expand Down Expand Up @@ -274,7 +275,7 @@ MAETealDataset <- R6::R6Class( # nolint
#' mae_d$get_code()
#' mae_d$get_raw_data()
#' @export
dataset.MultiAssayExperiment <- function(dataname,
dataset.MultiAssayExperiment <- function(dataname, # nolint
x,
keys = character(0),
label = data_label(x),
Expand Down
42 changes: 20 additions & 22 deletions R/TealDataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ TealDataset <- R6::R6Class( # nolint
self$set_vars(vars)
self$set_dataset_label(label)
self$set_keys(keys)
private$calculate_hash()

# needed if recreating dataset - we need to preserve code order and uniqueness
private$code <- CodeClass$new()
Expand Down Expand Up @@ -256,12 +255,6 @@ TealDataset <- R6::R6Class( # nolint
private$join_keys
},
#' @description
#' Returns the string representation of the raw data hashed with the MD5 hash algorithm.
#' @return `character` the hash of the raw data
get_hash = function() {
private$data_hash
},
#' @description
#' Get the list of dependencies that are `TealDataset` or `TealDatasetConnector` objects
#'
#' @return `list`
Expand Down Expand Up @@ -303,7 +296,9 @@ TealDataset <- R6::R6Class( # nolint
common_mutate_vars <- intersect(names(datasets), names(private$mutate_vars))
private$mutate_vars[common_mutate_vars] <- datasets[common_mutate_vars]

logger::log_trace("TealDataset$reassign_datasets_vars reassigned vars for dataset: { deparse1(self$get_dataname()) }.")
logger::log_trace(
"TealDataset$reassign_datasets_vars reassigned vars for dataset: { deparse1(self$get_dataname()) }."
)
invisible(NULL)
},
#' @description
Expand All @@ -316,7 +311,9 @@ TealDataset <- R6::R6Class( # nolint
checkmate::assert_character(label, max.len = 1, any.missing = FALSE)
private$dataset_label <- label

logger::log_trace("TealDataset$set_dataset_label dataset_label set for dataset: { deparse1(self$get_dataname()) }.")
logger::log_trace(
"TealDataset$set_dataset_label dataset_label set for dataset: { deparse1(self$get_dataname()) }."
)
return(invisible(self))
},
#' @description
Expand Down Expand Up @@ -360,7 +357,10 @@ TealDataset <- R6::R6Class( # nolint
mutate_join_keys = function(dataset, val) {
self$get_join_keys()$mutate(private$dataname, dataset, val)
logger::log_trace(
"TealDatasetConnector$mutate_join_keys join_keys modified keys of { deparse1(self$get_dataname()) } against { dataset }."
paste0(
"TealDatasetConnector$mutate_join_keys join_keys modified keys",
"of { deparse1(self$get_dataname()) } against { dataset }."
)
)
return(invisible(self))
},
Expand Down Expand Up @@ -523,7 +523,9 @@ TealDataset <- R6::R6Class( # nolint
#' `TRUE` if the dataset generated from evaluating the
#' `get_code()` code is identical to the raw data, else `FALSE`.
check = function() {
logger::log_trace("TealDataset$check executing the code to reproduce dataset: { deparse1(self$get_dataname()) }...")
logger::log_trace(
"TealDataset$check executing the code to reproduce dataset: { deparse1(self$get_dataname()) }..."
)
if (!checkmate::test_character(self$get_code(), len = 1, pattern = "\\w+")) {
stop(
sprintf(
Expand Down Expand Up @@ -596,7 +598,6 @@ TealDataset <- R6::R6Class( # nolint
.keys = character(0),
mutate_code = list(),
mutate_vars = list(),
data_hash = character(0),
join_keys = NULL,

## __Private Methods ====
Expand All @@ -614,7 +615,9 @@ TealDataset <- R6::R6Class( # nolint
return(invisible(self))
},
mutate_eager = function() {
logger::log_trace("TealDatasetConnector$mutate_eager executing mutate code for dataset: { deparse1(self$get_dataname()) }...")
logger::log_trace(
"TealDatasetConnector$mutate_eager executing mutate code for dataset: { deparse1(self$get_dataname()) }..."
)
new_df <- private$execute_code(
code = private$mutate_list_to_code_class(),
vars = c(
Expand All @@ -628,7 +631,7 @@ TealDataset <- R6::R6Class( # nolint

# code set after successful evaluation
# otherwise code != dataset
# private$code$append(private$mutate_code)
# private$code$append(private$mutate_code) # nolint
private$append_mutate_code()
self$set_vars(private$mutate_vars)
private$mutate_code <- list()
Expand All @@ -642,7 +645,9 @@ TealDataset <- R6::R6Class( # nolint
vars = list()
)

logger::log_trace("TealDatasetConnector$mutate_eager executed mutate code for dataset: { deparse1(self$get_dataname()) }.")
logger::log_trace(
"TealDatasetConnector$mutate_eager executed mutate code for dataset: { deparse1(self$get_dataname()) }."
)

new_self
},
Expand Down Expand Up @@ -788,13 +793,6 @@ TealDataset <- R6::R6Class( # nolint
return(new_set)
},

# Calculates the MD5 hash of the raw data stored in this TealDataset.
# @return NULL
calculate_hash = function() {
private$data_hash <- digest::digest(self$get_raw_data(), algo = "md5")
NULL
},

# Set the name for the dataset
# @param dataname (`character`) the new name
# @return self invisibly for chaining
Expand Down
1 change: 0 additions & 1 deletion man/CDISCTealDataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 0 additions & 15 deletions man/dataset_class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 0 additions & 10 deletions tests/testthat/test-TealData.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,16 +147,6 @@ test_that("deep clone", {
expect_false(rlang::is_reference(x$get_join_keys(), x_copy$get_join_keys()))
})

testthat::test_that("The hashes of TealDatasets objects are correct after mutating the TealData object", {
mutated_iris <- iris
mutated_iris$test <- 1
mutated_iris_hash <- digest::digest(mutated_iris, algo = "md5")
rd <- teal_data(dataset("iris", iris))
mutate_data(rd, code = "iris$test <- 1")
rd$execute_mutate()
testthat::expect_equal(rd$get_dataset("iris")$get_hash(), mutated_iris_hash)
})

testthat::test_that("execute_mutate returns current datasets if no mutate_code", {
pull_fun <- callable_function(data.frame)
pull_fun$set_args(args = list(head_letters = head(letters)))
Expand Down
14 changes: 0 additions & 14 deletions tests/testthat/test-TealDataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -445,12 +445,6 @@ testthat::test_that("TealDataset$check returns FALSE if the passed code creates
testthat::expect_false(test_ds0$check())
})

testthat::test_that("get_hash returns the hash of the object passed to the constructor", {
iris_hash <- digest::digest(iris, algo = "md5")
ds <- TealDataset$new("iris", iris)
testthat::expect_equal(ds$get_hash(), iris_hash)
})

testthat::test_that("get_code_class returns the correct CodeClass object", {
cc1 <- CodeClass$new(code = "iris <- head(iris)", dataname = "iris")
cc2 <- CodeClass$new(code = "mtcars <- head(mtcars)", dataname = "mtcars", deps = "iris")
Expand Down Expand Up @@ -758,14 +752,6 @@ test_that("mutate_dataset with vars argument", {
)
})

testthat::test_that("get_hash returns the correct hash after mutating the TealDataset object", {
mutated_iris <- iris
mutated_iris$test <- 1
mutated_iris_hash <- digest::digest(mutated_iris, algo = "md5")
ds <- TealDataset$new("iris", iris) %>% mutate_dataset("iris$test <- 1")
testthat::expect_equal(ds$get_hash(), mutated_iris_hash)
})

testthat::test_that("dataset$merge_join_keys does not throw on basic input", {
dataset1 <- TealDataset$new("iris", head(iris))
dataset1$set_join_keys(join_key("iris", "other_dataset", c("Species" = "some_col")))
Expand Down

0 comments on commit 71bf77c

Please sign in to comment.