Merge pull request #70 from OHDSI/develop

Release 0.5.10
OHDSI · Aug 21, 2024 · c734b7e · c734b7e
2 parents d939010 + 65ebc9b
commit c734b7e
Show file tree

Hide file tree

Showing 44 changed files with 530 additions and 145 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -35,4 +35,6 @@ compare_versions
 LICENSE
 .git
 ..Rcheck
-errorReportSql.txt
+errorReportSql.txt
+^CRAN-SUBMISSION$
+^cran-comments\.md$
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
@@ -0,0 +1,3 @@
+Version: 0.5.10
+Date: 2024-08-21 04:10:48 UTC
+SHA: bb001b6745dfbc303db44ef96b9eb8aaa2f67901
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ResultModelManager
 Title: Result Model Manager
-Version: 0.5.9
+Version: 0.5.10
 Authors@R:
     person("Jamie", "Gilbert", , "gilbert@ohdsi.org", role = c("aut", "cre"))
 Description: Database data model management utilities for R packages in the Observational Health Data Sciences and
@@ -35,7 +35,6 @@ Suggests:
   testthat (>= 3.0.0),
   RSQLite,
   duckdb,
-  RPostgres,
   knitr,
   rmarkdown,
   keyring,

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,23 @@
+# ResultModelManager 0.5.10
+
+Changes:
+
+1. Using readr column types to work around issues with inconsistent type conversion between DBI and JDBC drivers.
+
+Bug fixes:
+
+1. Resolved issue where failed queries were being aborted inside the wrong connection
+in PooledConnectionHandler
+
+2. Refactored pooled connection handler to better ensure checkout connections are returned
+
+
+# ResultModelManager 0.5.9
+
+Changes:
+
+1. More tidy cleanup of PooledConnectionHandlers to prevent leaked connections.
+
 # ResultModelManager 0.5.9
 
 Changes:

diff --git a/R/ConnectionHandler.R b/R/ConnectionHandler.R
@@ -14,6 +14,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Limit row count is intended for web applications that may cause a denial of service
+.limitRowCount <- function(sql, overrideRowLimit) {
+  limitRowCount <- as.integer(Sys.getenv("LIMIT_ROW_COUNT"))
+  if (!is.na(limitRowCount) &
+    limitRowCount > 0 &
+    !overrideRowLimit) {
+    sql <- SqlRender::render("SELECT TOP @limit_row_count * FROM (@query) result;",
+      query = gsub(";$", "", sql), # Remove last semi-colon
+      limit_row_count = limitRowCount
+    )
+  }
+  return(sql)
+}
+
 #' ConnectionHandler
 #' @description
 #' Class for handling DatabaseConnector:connection objects with consistent R6 interfaces for pooled and non-pooled connections.
@@ -111,10 +125,6 @@ ConnectionHandler <- R6::R6Class(
     #' Connects automatically if it isn't yet loaded
     #' @returns DatabaseConnector Connection instance
     getConnection = function() {
-      if (is.null(self$con)) {
-        self$initConnection()
-      }
-
       if (!self$dbIsValid()) {
         self$initConnection()
       }
@@ -168,15 +178,7 @@ ConnectionHandler <- R6::R6Class(
     #' @param ...                                   Additional query parameters
     #' @returns boolean TRUE if connection is valid
     queryDb = function(sql, snakeCaseToCamelCase = self$snakeCaseToCamelCase, overrideRowLimit = FALSE, ...) {
-      # Limit row count is intended for web applications that may cause a denial of service if they consume too many
-      # resources.
-      limitRowCount <- as.integer(Sys.getenv("LIMIT_ROW_COUNT"))
-      if (!is.na(limitRowCount) & limitRowCount > 0 & !overrideRowLimit) {
-        sql <- SqlRender::render("SELECT TOP @limit_row_count * FROM (@query) result;",
-          query = gsub(";$", "", sql), # Remove last semi-colon
-          limit_row_count = limitRowCount
-        )
-      }
+      sql <- .limitRowCount(sql, overrideRowLimit)
       sql <- self$renderTranslateSql(sql, ...)
 
       tryCatch(
@@ -203,7 +205,7 @@ ConnectionHandler <- R6::R6Class(
 
       tryCatch(
         {
-          data <- self$executeFunction(sql)
+          self$executeFunction(sql)
         },
         error = function(error) {
           if (self$dbms() %in% c("postgresql", "redshift")) {
@@ -223,17 +225,19 @@ ConnectionHandler <- R6::R6Class(
     #' Does not translate or render sql.
     #' @param sql                                   sql query string
     #' @param snakeCaseToCamelCase                  (Optional) Boolean. return the results columns in camel case (default)
-    queryFunction = function(sql, snakeCaseToCamelCase = self$snakeCaseToCamelCase) {
-      DatabaseConnector::querySql(self$getConnection(), sql, snakeCaseToCamelCase = snakeCaseToCamelCase)
+    #' @param connection                            (Optional) connection object
+    queryFunction = function(sql, snakeCaseToCamelCase = self$snakeCaseToCamelCase, connection = self$getConnection()) {
+      DatabaseConnector::querySql(connection, sql, snakeCaseToCamelCase = snakeCaseToCamelCase)
     },
 
     #' execute Function
     #' @description
     #' exec query Function that can be overriden with subclasses (e.g. use different base function or intercept query)
     #' Does not translate or render sql.
     #' @param sql                                   sql query string
-    executeFunction = function(sql) {
-      DatabaseConnector::executeSql(self$getConnection(), sql)
+    #' @param connection                            connection object
+    executeFunction = function(sql, connection = self$getConnection()) {
+      DatabaseConnector::executeSql(connection, sql)
     }
   )
 )
diff --git a/R/DataModel.R b/R/DataModel.R
@@ -110,7 +110,7 @@ checkAndFixDataTypes <-
           table <- dplyr::mutate_at(table, i, as.numeric)
         }
       } else if (expectedType == "int") {
-        if (observedTypes[i] != "integer") {
+        if (!observedTypes[i] %in% c("integer", "numeric")) {
           ParallelLogger::logDebug(
             sprintf(
               "Column %s in table %s in results folder %s is of type %s, but was expecting %s. Attempting to convert.",
@@ -121,7 +121,7 @@ checkAndFixDataTypes <-
               expectedType
             )
           )
-          table <- dplyr::mutate_at(table, i, as.integer)
+          table <- dplyr::mutate_at(table, i, as.numeric)
         }
       } else if (expectedType == "varchar") {
         if (observedTypes[i] != "character") {
@@ -423,12 +423,14 @@ uploadTable <- function(tableName,
                         purgeSiteDataBeforeUploading,
                         warnOnMissingTable) {
   csvFileName <- paste0(tableName, ".csv")
+  specifications <- specifications %>%
+    dplyr::filter(.data$tableName == !!tableName)
+
   if (csvFileName %in% list.files(resultsFolder)) {
     rlang::inform(paste0("Uploading file: ", csvFileName, " to table: ", tableName))
 
     primaryKey <- specifications %>%
-      dplyr::filter(.data$tableName == !!tableName &
-        tolower(.data$primaryKey) == "yes") %>%
+      dplyr::filter(tolower(.data$primaryKey) == "yes") %>%
       dplyr::select("columnName") %>%
       dplyr::pull()
 
@@ -443,8 +445,7 @@ uploadTable <- function(tableName,
     env$purgeSiteDataBeforeUploading <- purgeSiteDataBeforeUploading
     if (purgeSiteDataBeforeUploading && "database_id" %in% primaryKey) {
       type <- specifications %>%
-        dplyr::filter(.data$tableName == !!tableName &
-          .data$columnName == "database_id") %>%
+        dplyr::filter(.data$columnName == "database_id") %>%
         dplyr::select("dataType") %>%
         dplyr::pull()
       # Remove the existing data for the databaseId
@@ -477,12 +478,32 @@ uploadTable <- function(tableName,
       env$primaryKeyValuesInDb <- primaryKeyValuesInDb
     }
 
+    # Remove data size or types
+    types <- sub(" ", "", sub("\\(.*\\)", "", specifications$dataType))
+
+    # Convert the types to readr's col_types format
+    convertType <- Vectorize(
+      function(type) {
+        switch(type,
+          varchar = "c",
+          bigint = "n",
+          int = "n",
+          date = "D",
+          "?"
+        ) # default to guess if type not matched
+      }
+    )
+
+    types <- convertType(types)
+    # Create a named vector of column types
+    names(types) <- specifications$columnName
+    colTypes <- do.call(readr::cols, as.list(types))
 
     readr::read_csv_chunked(
       file = file.path(resultsFolder, csvFileName),
       callback = function(chunk, pos) uploadChunk(chunk, pos, env, specifications, resultsFolder, connection, runCheckAndFixCommands, forceOverWriteOfSpecifications),
       chunk_size = 1e7,
-      col_types = readr::cols(),
+      col_types = colTypes,
       guess_max = 1e6,
       progress = FALSE
     )