diff --git a/DESCRIPTION b/DESCRIPTION index 848bad2..8c68d80 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ResultModelManager Title: Result Model Manager (RMM) for OHDSI packages -Version: 0.1.0 +Version: 0.1.1 Authors@R: person("Jamie", "Gilbert", , "gilbert@ohdsi.org", role = c("aut", "cre")) Description: Database data model management utilities for OHDSI packages. @@ -17,7 +17,8 @@ Imports: ParallelLogger, checkmate, DBI, - pool + pool, + readr Suggests: testthat (>= 3.0.0), RSQLite, diff --git a/NAMESPACE b/NAMESPACE index c99495f..f1e51f7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(ConnectionHandler) export(DataMigrationManager) export(PooledConnectionHandler) +export(generateSqlSchema) import(DatabaseConnector) import(R6) import(checkmate) @@ -13,3 +14,4 @@ importFrom(SqlRender,render) importFrom(SqlRender,translate) importFrom(pool,dbPool) importFrom(pool,poolClose) +importFrom(readr,read_csv) diff --git a/NEWS.md b/NEWS.md index 467788f..964e8a1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ -# ResultModelManager 0.0.1 +# ResultModelManager 0.1.1 + +Changes: +1. Added snakeCaseToCamelCase parameter to public in connectionHandlers so it can be defined once if required + +2. Added schema generator function that creates sql from csv files with table defs + +# ResultModelManager 0.1.0 Initial version \ No newline at end of file diff --git a/R/ConnectionHandler.R b/R/ConnectionHandler.R index 975c92b..5a3726a 100644 --- a/R/ConnectionHandler.R +++ b/R/ConnectionHandler.R @@ -21,7 +21,8 @@ #' #' @field connectionDetails DatabaseConnector connectionDetails object #' @field con DatabaseConnector connection object -#' @field isActive Is connection active or not +#' @field isActive Is connection active or not#' +#' @field snakeCaseToCamelCase (Optional) Boolean. return the results columns in camel case (default) #' #' @import checkmate #' @import R6 @@ -35,13 +36,15 @@ ConnectionHandler <- R6::R6Class( connectionDetails = NULL, con = NULL, isActive = FALSE, + snakeCaseToCamelCase = TRUE, #' #' @param connectionDetails DatabaseConnector::connectionDetails class #' @param loadConnection Boolean option to load connection right away - initialize = function(connectionDetails, loadConnection = TRUE) { + #' @param snakeCaseToCamelCase (Optional) Boolean. return the results columns in camel case (default) + initialize = function(connectionDetails, loadConnection = TRUE, snakeCaseToCamelCase = TRUE) { checkmate::assertClass(connectionDetails, "connectionDetails") self$connectionDetails <- connectionDetails - + self$snakeCaseToCamelCase <- snakeCaseToCamelCase if (loadConnection) { self$initConnection() } @@ -75,11 +78,13 @@ ConnectionHandler <- R6::R6Class( #' Connects automatically if it isn't yet loaded #' @returns DatabaseConnector Connection instance getConnection = function() { - if (is.null(self$con)) + if (is.null(self$con)) { self$initConnection() + } - if (!self$dbIsValid()) + if (!self$dbIsValid()) { self$initConnection() + } return(self$con) }, @@ -128,14 +133,14 @@ ConnectionHandler <- R6::R6Class( #' You may wish to ignore it. #' @param ... Additional query parameters #' @returns boolean TRUE if connection is valid - queryDb = function(sql, snakeCaseToCamelCase = TRUE, overrideRowLimit = FALSE, ...) { + queryDb = function(sql, snakeCaseToCamelCase = self$snakeCaseToCamelCase, overrideRowLimit = FALSE, ...) { # Limit row count is intended for web applications that may cause a denial of service if they consume too many # resources. limitRowCount <- as.integer(Sys.getenv("LIMIT_ROW_COUNT")) if (!is.na(limitRowCount) & limitRowCount > 0 & !overrideRowLimit) { sql <- SqlRender::render("SELECT TOP @limit_row_count * FROM (@query) result;", - query = gsub(";$", "", sql), # Remove last semi-colon - limit_row_count = limitRowCount + query = gsub(";$", "", sql), # Remove last semi-colon + limit_row_count = limitRowCount ) } sql <- self$renderTranslateSql(sql, ...) @@ -186,7 +191,7 @@ ConnectionHandler <- R6::R6Class( #' Does not translate or render sql. #' @param sql sql query string #' @param snakeCaseToCamelCase (Optional) Boolean. return the results columns in camel case (default) - queryFunction = function(sql, snakeCaseToCamelCase = TRUE) { + queryFunction = function(sql, snakeCaseToCamelCase = self$snakeCaseToCamelCase) { DatabaseConnector::querySql(self$getConnection(), sql, snakeCaseToCamelCase = snakeCaseToCamelCase) }, @@ -200,4 +205,3 @@ ConnectionHandler <- R6::R6Class( } ) ) - diff --git a/R/DataMigrationManager.R b/R/DataMigrationManager.R index eb5ebb7..94ed384 100644 --- a/R/DataMigrationManager.R +++ b/R/DataMigrationManager.R @@ -213,14 +213,14 @@ DataMigrationManager <- R6::R6Class( # load list of migrations migrations <- self$getStatus() # execute migrations that haven't been executed yet - migrations <- migrations[!migrations$executed,] + migrations <- migrations[!migrations$executed, ] if (nrow(migrations) > 0) { if (is.null(stopMigrationVersion)) { stopMigrationVersion <- max(migrations$migrationOrder) } for (i in 1:nrow(migrations)) { - migration <- migrations[i,] + migration <- migrations[i, ] if (isTRUE(migration$migrationOrder <= stopMigrationVersion)) { private$executeMigration(migration) } @@ -250,10 +250,11 @@ DataMigrationManager <- R6::R6Class( # Load, render, translate and execute sql if (self$isPackage()) { sql <- SqlRender::loadRenderTranslateSql(file.path(self$migrationPath, migration$migrationFile), - dbms = private$connectionDetails$dbms, - database_schema = self$databaseSchema, - table_prefix = self$tablePrefix, - packageName = self$packageName) + dbms = private$connectionDetails$dbms, + database_schema = self$databaseSchema, + table_prefix = self$tablePrefix, + packageName = self$packageName + ) private$connectionHandler$executeSql(sql) } else { # Check to see if a file for database platform exists @@ -264,8 +265,9 @@ DataMigrationManager <- R6::R6Class( sql <- SqlRender::readSql(file.path(self$migrationPath, "sql_server", migration$migrationFile)) } private$connectionHandler$executeSql(sql, - database_schema = self$databaseSchema, - table_prefix = self$tablePrefix) + database_schema = self$databaseSchema, + table_prefix = self$tablePrefix + ) } private$logInfo("Saving migration: ", migration$migrationFile) # Save migration in set of migrations @@ -275,10 +277,10 @@ DataMigrationManager <- R6::R6Class( VALUES ('@migration_file', @order); " private$connectionHandler$executeSql(iSql, - database_schema = self$databaseSchema, - migration_file = migration$migrationFile, - table_prefix = self$tablePrefix, - order = migration$migrationOrder + database_schema = self$databaseSchema, + migration_file = migration$migrationFile, + table_prefix = self$tablePrefix, + order = migration$migrationOrder ) private$logInfo("Migration complete ", migration$migrationFile) }, @@ -293,8 +295,9 @@ DataMigrationManager <- R6::R6Class( );" private$connectionHandler$executeSql(sql, - database_schema = self$databaseSchema, - table_prefix = self$tablePrefix) + database_schema = self$databaseSchema, + table_prefix = self$tablePrefix + ) private$logInfo("Migrations table created") }, getCompletedMigrations = function() { @@ -306,8 +309,9 @@ DataMigrationManager <- R6::R6Class( {DEFAULT @migration = migration} SELECT migration_file, migration_order FROM @database_schema.@table_prefix@migration ORDER BY migration_order;" migrationsExecuted <- private$connectionHandler$queryDb(sql, - database_schema = self$databaseSchema, - table_prefix = self$tablePrefix) + database_schema = self$databaseSchema, + table_prefix = self$tablePrefix + ) return(migrationsExecuted) }, @@ -329,7 +333,6 @@ DataMigrationManager <- R6::R6Class( ParallelLogger::logError(...) } }, - logInfo = function(...) { if (isUnitTest() | isRmdCheck()) { writeLines(text = .makeMessage(...)) diff --git a/R/PooledConnectionHandler.R b/R/PooledConnectionHandler.R index e117cfd..ed91c7d 100644 --- a/R/PooledConnectionHandler.R +++ b/R/PooledConnectionHandler.R @@ -70,7 +70,7 @@ PooledConnectionHandler <- R6::R6Class( #' Overrides ConnectionHandler Call. Does not translate or render sql. #' @param sql sql query string #' @param snakeCaseToCamelCase (Optional) Boolean. return the results columns in camel case (default) - queryFunction = function(sql, snakeCaseToCamelCase = TRUE) { + queryFunction = function(sql, snakeCaseToCamelCase = self$snakeCaseToCamelCase) { data <- DatabaseConnector::dbGetQuery(self$getConnection(), sql) if (snakeCaseToCamelCase) { colnames(data) <- SqlRender::snakeCaseToCamelCase(colnames(data)) diff --git a/R/SchemaGenerator.R b/R/SchemaGenerator.R new file mode 100644 index 0000000..7feb3dc --- /dev/null +++ b/R/SchemaGenerator.R @@ -0,0 +1,92 @@ +# Copyright 2022 Observational Health Data Sciences and Informatics +# +# This file is part of CohortDiagnostics +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.writeFieldDefinition <- function(field) { + field <- as.list(field) + str <- paste("\t", field$columnName, toupper(field$dataType)) + + if (field$primaryKey == "yes") { + str <- paste(str, "NOT NULL") + } + + str +} + +#' Schema generator +#' @description +#' Take a csv schema definition and create a basic sql script with it. +#' +#' @param csvFilepath Path to schema file. Csv file must have the columns: +#' "table_name", "colum_name", "data_type", "is_required", "primary_key" +#' Note - +#' @param sqlOutputPath File to write sql to. +#' @param overwrite Boolean - overwrite existing file? +#' @export +#' +#' @importFrom readr read_csv +#' @return +#' string containing the sql for the table +generateSqlSchema <- function(csvFilepath, + sqlOutputPath = NULL, + overwrite = FALSE) { + if (!is.null(sqlOutputPath) && (file.exists(sqlOutputPath) & !overwrite)) { + stop("Output file ", sqlOutputPath, "already exists. Set overwrite = TRUE to continue") + } + + checkmate::assertFileExists(csvFilepath) + schemaDefinition <- readr::read_csv(csvFilepath, show_col_types = FALSE) + colnames(schemaDefinition) <- SqlRender::snakeCaseToCamelCase(colnames(schemaDefinition)) + requiredFields <- c("tableName", "columnName", "dataType", "isRequired", "primaryKey") + checkmate::assertNames(colnames(schemaDefinition), must.include = requiredFields) + + tableSqlStr <- " +CREATE TABLE @database_schema.@table_prefix@table_name ( + @table_fields +); +" + fullScript <- "" + defs <- "{DEFAULT @table_prefix = ''}\n" + + for (table in unique(schemaDefinition$tableName)) { + tableFields <- schemaDefinition[schemaDefinition$tableName == table, ] + fieldDefinitions <- apply(tableFields, 1, .writeFieldDefinition) + + primaryKeyFields <- tableFields[tableFields$primaryKey == "yes", ] + if (nrow(primaryKeyFields)) { + pkeyField <- paste0("\tPRIMARY KEY(", paste(primaryKeyFields$columnName, collapse = ","), ")") + fieldDefinitions <- c(fieldDefinitions, pkeyField) + } + + fieldDefinitions <- paste(fieldDefinitions, collapse = ",\n") + tableString <- SqlRender::render(tableSqlStr, + table_name = paste0("@", table), + table_fields = fieldDefinitions + ) + + tableDefStr <- paste0("{DEFAULT @", table, " = ", table, "}\n") + defs <- paste0(defs, tableDefStr) + + fullScript <- paste(fullScript, tableString) + } + + # Get fields for each table + lines <- paste(defs, fullScript) + if (!is.null(sqlOutputPath)) { + writeLines(lines, sqlOutputPath) + } + + lines +} diff --git a/extras/ResultModelManager.pdf b/extras/ResultModelManager.pdf index 2181949..c11f98a 100644 Binary files a/extras/ResultModelManager.pdf and b/extras/ResultModelManager.pdf differ diff --git a/man/ConnectionHandler.Rd b/man/ConnectionHandler.Rd index e238ad5..3689df0 100644 --- a/man/ConnectionHandler.Rd +++ b/man/ConnectionHandler.Rd @@ -24,7 +24,9 @@ Allows a connection to cleanly be opened and closed and stored within class/obje \item{\code{con}}{DatabaseConnector connection object} -\item{\code{isActive}}{Is connection active or not} +\item{\code{isActive}}{Is connection active or not#'} + +\item{\code{snakeCaseToCamelCase}}{(Optional) Boolean. return the results columns in camel case (default)} } \if{html}{\out{}} } @@ -50,7 +52,11 @@ Allows a connection to cleanly be opened and closed and stored within class/obje \if{latex}{\out{\hypertarget{method-ConnectionHandler-new}{}}} \subsection{Method \code{new()}}{ \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ConnectionHandler$new(connectionDetails, loadConnection = TRUE)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{ConnectionHandler$new( + connectionDetails, + loadConnection = TRUE, + snakeCaseToCamelCase = TRUE +)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -58,7 +64,9 @@ Allows a connection to cleanly be opened and closed and stored within class/obje \describe{ \item{\code{connectionDetails}}{DatabaseConnector::connectionDetails class} -\item{\code{loadConnection}}{Boolean option to load connection right away +\item{\code{loadConnection}}{Boolean option to load connection right away} + +\item{\code{snakeCaseToCamelCase}}{(Optional) Boolean. return the results columns in camel case (default) Render Translate Sql.} } \if{html}{\out{}} @@ -150,7 +158,7 @@ resources. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{ConnectionHandler$queryDb( sql, - snakeCaseToCamelCase = TRUE, + snakeCaseToCamelCase = self$snakeCaseToCamelCase, overrideRowLimit = FALSE, ... )}\if{html}{\out{
}} @@ -198,7 +206,10 @@ query Function} queryFunction that can be overriden with subclasses (e.g. use different base function or intercept query) Does not translate or render sql. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ConnectionHandler$queryFunction(sql, snakeCaseToCamelCase = TRUE)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{ConnectionHandler$queryFunction( + sql, + snakeCaseToCamelCase = self$snakeCaseToCamelCase +)}\if{html}{\out{
}} } \subsection{Arguments}{ diff --git a/man/PooledConnectionHandler.Rd b/man/PooledConnectionHandler.Rd index f6877f9..d9a650f 100644 --- a/man/PooledConnectionHandler.Rd +++ b/man/PooledConnectionHandler.Rd @@ -23,13 +23,13 @@ Useful for long running applications that serve multiple concurrent requests. \if{html}{\out{
Inherited methods
}} @@ -78,7 +78,10 @@ query Function \subsection{Method \code{queryFunction()}}{ Overrides ConnectionHandler Call. Does not translate or render sql. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PooledConnectionHandler$queryFunction(sql, snakeCaseToCamelCase = TRUE)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{PooledConnectionHandler$queryFunction( + sql, + snakeCaseToCamelCase = self$snakeCaseToCamelCase +)}\if{html}{\out{
}} } \subsection{Arguments}{ diff --git a/man/generateSqlSchema.Rd b/man/generateSqlSchema.Rd new file mode 100644 index 0000000..94e1c7a --- /dev/null +++ b/man/generateSqlSchema.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SchemaGenerator.R +\name{generateSqlSchema} +\alias{generateSqlSchema} +\title{Schema generator} +\usage{ +generateSqlSchema(csvFilepath, sqlOutputPath = NULL, overwrite = FALSE) +} +\arguments{ +\item{csvFilepath}{Path to schema file. Csv file must have the columns: +"table_name", "colum_name", "data_type", "is_required", "primary_key" +Note -} + +\item{sqlOutputPath}{File to write sql to.} + +\item{overwrite}{Boolean - overwrite existing file?} +} +\value{ +string containing the sql for the table +} +\description{ +Take a csv schema definition and create a basic sql script with it. +} diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 0d6605d..6a8649e 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -4,4 +4,4 @@ sqliteFile <- tempfile(fileext = "sqlite") connectionDetails <- DatabaseConnector::createConnectionDetails("sqlite", server = sqliteFile) connection <- DatabaseConnector::connect(connectionDetails) DatabaseConnector::disconnect(connection) -options(rstudio.connectionObserver.errorsSuppressed = TRUE) \ No newline at end of file +options(rstudio.connectionObserver.errorsSuppressed = TRUE) diff --git a/tests/testthat/test-ConnectionHandler.R b/tests/testthat/test-ConnectionHandler.R index 08bbd5b..68f60e2 100644 --- a/tests/testthat/test-ConnectionHandler.R +++ b/tests/testthat/test-ConnectionHandler.R @@ -18,13 +18,11 @@ genericTests <- function(connClass, classes, connectionClass) { " DatabaseConnector::renderTranslateExecuteSql(testConnection, sql) - on.exit( - { - sql <- "DROP TABLE IF EXISTS main.concept;" - DatabaseConnector::renderTranslateExecuteSql(testConnection, sql) - DatabaseConnector::disconnect(testConnection) - } - ) + on.exit({ + sql <- "DROP TABLE IF EXISTS main.concept;" + DatabaseConnector::renderTranslateExecuteSql(testConnection, sql) + DatabaseConnector::disconnect(testConnection) + }) conn <- connClass$new(connectionDetails) checkmate::expect_class(conn, classes) @@ -74,12 +72,14 @@ genericTests <- function(connClass, classes, connectionClass) { test_that("Database Connector Class works", { genericTests(ConnectionHandler, - classes = c("ConnectionHandler"), - connectionClass = "DatabaseConnectorDbiConnection") + classes = c("ConnectionHandler"), + connectionClass = "DatabaseConnectorDbiConnection" + ) }) test_that("Pooled connector Class works", { genericTests(PooledConnectionHandler, - classes = c("PooledConnectionHandler", "ConnectionHandler"), - connectionClass = "Pool") + classes = c("PooledConnectionHandler", "ConnectionHandler"), + connectionClass = "Pool" + ) }) diff --git a/tests/testthat/test-SchemaGenerator.R b/tests/testthat/test-SchemaGenerator.R new file mode 100644 index 0000000..c70916e --- /dev/null +++ b/tests/testthat/test-SchemaGenerator.R @@ -0,0 +1,21 @@ +test_that("Schema gen from file", { + testCd <- DatabaseConnector::createConnectionDetails(server = "testSchema.db", dbms = "sqlite") + connection <- DatabaseConnector::connect(testCd) + tfile <- tempfile() + on.exit({ + unlink("testSchema.db") + unlink(tfile) + DatabaseConnector::disconnect(connection) + }) + + schema <- generateSqlSchema("testSchemaDef.csv", tfile) + checkmate::expect_file_exists(tfile) + + schemaDetails <- readr::read_csv("testSchemaDef.csv", show_col_types = FALSE) + checkmate::expect_string(schema) + DatabaseConnector::renderTranslateExecuteSql(connection, schema, database_schema = "main") + + for (table in schemaDetails$table_name) { + res <- DatabaseConnector::renderTranslateQuerySql(connection, "SELECT * FROM @table_name", table_name = table) + } +}) diff --git a/tests/testthat/testSchemaDef.csv b/tests/testthat/testSchemaDef.csv new file mode 100644 index 0000000..a43ce5c --- /dev/null +++ b/tests/testthat/testSchemaDef.csv @@ -0,0 +1,35 @@ +table_name,column_name,data_type,is_required,primary_key,min_cell_count,description +cohort_definition,cohort_definition_id,bigint,yes,yes,no,cohort definition id +cohort_definition,concept_id,bigint,yes,no,no,covariate id +cohort_definition,cohort_name,varchar,yes,no,no,cohort definition name +cohort_definition,short_name,varchar,yes,no,no,cohort definition name +cohort_definition,atc_flag,int,yes,no,no,cohort definition name +cohort_definition,database_id,bigint,yes,yes,no,database definition id +cdm_source_info,database_id,bigint,yes,yes,no,database definition id +cdm_source_info,cdm_source_abbreviation,varchar,no,no,no,cdm info +cdm_source_info,cdm_holder,varchar,no,no,no,cdm info +cdm_source_info,source_description,varchar,no,no,no,cdm info +cdm_source_info,source_documentation_reference,varchar,no,no,no,cdm info +cdm_source_info,cdm_etl_reference,varchar,no,no,no,cdm info +cdm_source_info,source_release_date,date,no,no,no,cdm info +cdm_source_info,cdm_release_date,date,no,no,no,cdm info +cdm_source_info,cdm_version,varchar,no,no,no,cdm info +cdm_source_info,vocabulary_version,varchar,no,no,no,cdm info +cohort_counts,num_persons,bigint,yes,no,yes,cohort size +cohort_counts,cohort_definition_id,bigint,yes,yes,no,cohort definition id +cohort_counts,database_id,bigint,yes,yes,no,database definition id +cosine_similarity,database_id,bigint,yes,yes,no,database definition id +cosine_similarity,cohort_definition_id_1,bigint,yes,yes,no,database definition id +cosine_similarity,cohort_definition_id_2,bigint,yes,yes,no,database definition id +cosine_similarity,covariate_type,varchar,yes,no,no,database definition id +cosine_similarity,cosine_similarity,float,yes,no,no,database definition id +covariate_definition,covariate_id,bigint,yes,no,no,covariate id +covariate_definition,covariate_name,varchar,yes,no,no,covariate name +covariate_definition,concept_id,bigint,yes,no,no,covariate id +covariate_definition,time_at_risk_start,int,yes,no,no,time at risk start +covariate_definition,time_at_risk_end,int,yes,no,no,time at risk end +covariate_definition,covariate_type,varchar,yes,no,no,database definition id +covariate_mean,database_id,bigint,yes,yes,no,database definition id +covariate_mean,cohort_definition_id,bigint,yes,yes,no,database definition id +covariate_mean,covariate_id,bigint,yes,yes,no,database definition id +covariate_mean,covariate_mean,float,yes,yes,no,database definition id