From 71bf77c045db01b033acf1479250eee5577a1d4f Mon Sep 17 00:00:00 2001 From: Mahmoud Hallal <86970066+mhallal1@users.noreply.github.com> Date: Fri, 11 Nov 2022 15:57:09 +0100 Subject: [PATCH] remove hashing (#119) related to https://github.com/insightsengineering/teal/issues/751 remove datasets hashing and transfer it to `teal`. Co-authored-by: 27856297+dependabot-preview[bot]@users.noreply.github.com <27856297+dependabot-preview[bot]@users.noreply.github.com> --- NEWS.md | 1 + R/MAETealDataset.R | 7 +++--- R/TealDataset.R | 42 +++++++++++++++---------------- man/CDISCTealDataset.Rd | 1 - man/dataset_class.Rd | 15 ----------- tests/testthat/test-TealData.R | 10 -------- tests/testthat/test-TealDataset.R | 14 ----------- 7 files changed, 25 insertions(+), 65 deletions(-) diff --git a/NEWS.md b/NEWS.md index d7ea60a0c..db4208fe6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ * Examples now use `scda.2022` instead of `scda.2021`. * Modified `teal.Dataset$print` method for a less cluttered output. +* Transferred data hashing step in `TealDataset` and `MAETealDataset` to `teal`. # teal.data 0.1.2 diff --git a/R/MAETealDataset.R b/R/MAETealDataset.R index 2f2da6492..20c9f4594 100644 --- a/R/MAETealDataset.R +++ b/R/MAETealDataset.R @@ -75,7 +75,6 @@ MAETealDataset <- R6::R6Class( # nolint self$set_vars(vars) self$set_dataset_label(label) self$set_keys(keys) - private$calculate_hash() # needed if recreating dataset - we need to preserve code order and uniqueness private$code <- CodeClass$new() @@ -96,7 +95,9 @@ MAETealDataset <- R6::R6Class( # nolint #' `TRUE` if the dataset generated from evaluating the #' `get_code()` code is identical to the raw data, else `FALSE`. check = function() { - logger::log_trace("TealDataset$check executing the code to reproduce dataset: { deparse1(self$get_dataname()) }...") + logger::log_trace( + "TealDataset$check executing the code to reproduce dataset: { deparse1(self$get_dataname()) }..." + ) if (!checkmate::test_character(self$get_code(), len = 1, pattern = "\\w+")) { stop( sprintf( @@ -274,7 +275,7 @@ MAETealDataset <- R6::R6Class( # nolint #' mae_d$get_code() #' mae_d$get_raw_data() #' @export -dataset.MultiAssayExperiment <- function(dataname, +dataset.MultiAssayExperiment <- function(dataname, # nolint x, keys = character(0), label = data_label(x), diff --git a/R/TealDataset.R b/R/TealDataset.R index 63e6fec59..5b6bd0aff 100644 --- a/R/TealDataset.R +++ b/R/TealDataset.R @@ -73,7 +73,6 @@ TealDataset <- R6::R6Class( # nolint self$set_vars(vars) self$set_dataset_label(label) self$set_keys(keys) - private$calculate_hash() # needed if recreating dataset - we need to preserve code order and uniqueness private$code <- CodeClass$new() @@ -256,12 +255,6 @@ TealDataset <- R6::R6Class( # nolint private$join_keys }, #' @description - #' Returns the string representation of the raw data hashed with the MD5 hash algorithm. - #' @return `character` the hash of the raw data - get_hash = function() { - private$data_hash - }, - #' @description #' Get the list of dependencies that are `TealDataset` or `TealDatasetConnector` objects #' #' @return `list` @@ -303,7 +296,9 @@ TealDataset <- R6::R6Class( # nolint common_mutate_vars <- intersect(names(datasets), names(private$mutate_vars)) private$mutate_vars[common_mutate_vars] <- datasets[common_mutate_vars] - logger::log_trace("TealDataset$reassign_datasets_vars reassigned vars for dataset: { deparse1(self$get_dataname()) }.") + logger::log_trace( + "TealDataset$reassign_datasets_vars reassigned vars for dataset: { deparse1(self$get_dataname()) }." + ) invisible(NULL) }, #' @description @@ -316,7 +311,9 @@ TealDataset <- R6::R6Class( # nolint checkmate::assert_character(label, max.len = 1, any.missing = FALSE) private$dataset_label <- label - logger::log_trace("TealDataset$set_dataset_label dataset_label set for dataset: { deparse1(self$get_dataname()) }.") + logger::log_trace( + "TealDataset$set_dataset_label dataset_label set for dataset: { deparse1(self$get_dataname()) }." + ) return(invisible(self)) }, #' @description @@ -360,7 +357,10 @@ TealDataset <- R6::R6Class( # nolint mutate_join_keys = function(dataset, val) { self$get_join_keys()$mutate(private$dataname, dataset, val) logger::log_trace( - "TealDatasetConnector$mutate_join_keys join_keys modified keys of { deparse1(self$get_dataname()) } against { dataset }." + paste0( + "TealDatasetConnector$mutate_join_keys join_keys modified keys", + "of { deparse1(self$get_dataname()) } against { dataset }." + ) ) return(invisible(self)) }, @@ -523,7 +523,9 @@ TealDataset <- R6::R6Class( # nolint #' `TRUE` if the dataset generated from evaluating the #' `get_code()` code is identical to the raw data, else `FALSE`. check = function() { - logger::log_trace("TealDataset$check executing the code to reproduce dataset: { deparse1(self$get_dataname()) }...") + logger::log_trace( + "TealDataset$check executing the code to reproduce dataset: { deparse1(self$get_dataname()) }..." + ) if (!checkmate::test_character(self$get_code(), len = 1, pattern = "\\w+")) { stop( sprintf( @@ -596,7 +598,6 @@ TealDataset <- R6::R6Class( # nolint .keys = character(0), mutate_code = list(), mutate_vars = list(), - data_hash = character(0), join_keys = NULL, ## __Private Methods ==== @@ -614,7 +615,9 @@ TealDataset <- R6::R6Class( # nolint return(invisible(self)) }, mutate_eager = function() { - logger::log_trace("TealDatasetConnector$mutate_eager executing mutate code for dataset: { deparse1(self$get_dataname()) }...") + logger::log_trace( + "TealDatasetConnector$mutate_eager executing mutate code for dataset: { deparse1(self$get_dataname()) }..." + ) new_df <- private$execute_code( code = private$mutate_list_to_code_class(), vars = c( @@ -628,7 +631,7 @@ TealDataset <- R6::R6Class( # nolint # code set after successful evaluation # otherwise code != dataset - # private$code$append(private$mutate_code) + # private$code$append(private$mutate_code) # nolint private$append_mutate_code() self$set_vars(private$mutate_vars) private$mutate_code <- list() @@ -642,7 +645,9 @@ TealDataset <- R6::R6Class( # nolint vars = list() ) - logger::log_trace("TealDatasetConnector$mutate_eager executed mutate code for dataset: { deparse1(self$get_dataname()) }.") + logger::log_trace( + "TealDatasetConnector$mutate_eager executed mutate code for dataset: { deparse1(self$get_dataname()) }." + ) new_self }, @@ -788,13 +793,6 @@ TealDataset <- R6::R6Class( # nolint return(new_set) }, - # Calculates the MD5 hash of the raw data stored in this TealDataset. - # @return NULL - calculate_hash = function() { - private$data_hash <- digest::digest(self$get_raw_data(), algo = "md5") - NULL - }, - # Set the name for the dataset # @param dataname (`character`) the new name # @return self invisibly for chaining diff --git a/man/CDISCTealDataset.Rd b/man/CDISCTealDataset.Rd index be5a18f83..b7c02119b 100644 --- a/man/CDISCTealDataset.Rd +++ b/man/CDISCTealDataset.Rd @@ -57,7 +57,6 @@ x$get_parent()
teal.data::TealDataset$get_dataset()
teal.data::TealDataset$get_dataset_label()
teal.data::TealDataset$get_factor_colnames()
teal.data::TealDataset$get_hash()
teal.data::TealDataset$get_join_keys()
teal.data::TealDataset$get_keys()
teal.data::TealDataset$get_metadata()
teal.data::TealDataset$get_dataset()
teal.data::TealDataset$get_dataset_label()
teal.data::TealDataset$get_factor_colnames()
teal.data::TealDataset$get_hash()
teal.data::TealDataset$get_join_keys()
teal.data::TealDataset$get_keys()
teal.data::TealDataset$get_metadata()