diff --git a/.Rbuildignore b/.Rbuildignore index 47b389c8..fc84cfbe 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,3 +2,4 @@ ^\.Rproj\.user$ ^.travis.yml$ deploy.sh +^\.github$ diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..57ce3476 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +inst/doc/* linguist-documentation +vignettes/* linguist-documentation +man/* linguist-documentation +extras/* linguist-documentation diff --git a/DESCRIPTION b/DESCRIPTION index aec82c4f..53427379 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,23 +1,25 @@ Package: Achilles Type: Package -Title: Creates descriptive statistics summary for an entire OMOP CDM instance -Version: 1.6.1 -Date: 2018-10-02 +Title: Creates Descriptive Statistics Summary for an Entire OMOP CDM Instance +Version: 1.6.2 +Date: 2018-10-04 Author: Patrick Ryan, Martijn Schuemie, Vojtech Huser, Chris Knoll, Ajit Londhe Maintainer: Patrick Ryan LazyData: true -Description: creates descriptive statistics summary for an entire OMOP CDM +Description: Creates descriptive statistics summary for an entire OMOP CDM instance. Since Aug 2016 only CDM v5 is actively being extended. Achilles Heel component does data quality assesment. Depends: SqlRender, DatabaseConnector (>= 2.0.0), - rjson, - OhdsiRTools + rjson Suggests: testthat, dplyr, - R.utils + R.utils, + OhdsiRTools +Remotes: + OHDSI/OhdsiRTools License: Apache License Roxygen: list(wrap = FALSE) RoxygenNote: 6.1.0 diff --git a/NAMESPACE b/NAMESPACE index 9ac04c14..7abd991e 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,3 +27,10 @@ export(fetchAchillesHeelResults) export(getAnalysisDetails) export(showReportTypes) export(validateSchema) +import(DatabaseConnector) +import(SqlRender) +import(rjson) +importFrom(utils,compareVersion) +importFrom(utils,packageVersion) +importFrom(utils,read.csv) +importFrom(utils,zip) diff --git a/R/Achilles-package.R b/R/Achilles-package.R old mode 100755 new mode 100644 index 1849650a..0918636a --- a/R/Achilles-package.R +++ b/R/Achilles-package.R @@ -1,19 +1,5 @@ -#' Achilles -#' -#' @name Achilles -#' @docType package -NULL - - - - -#' allReports -#' -#' A list of reports -#' -#' -#' @docType data -#' @keywords datasets -#' @name allReports - +#' @import DatabaseConnector +#' @import SqlRender +#' @import rjson +#' @importFrom utils compareVersion packageVersion read.csv zip NULL diff --git a/R/Achilles.R b/R/Achilles.R index 6ae3f18d..69d8fe53 100755 --- a/R/Achilles.R +++ b/R/Achilles.R @@ -8,7 +8,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -74,28 +74,28 @@ #' numThreads = 10) #' } #' @export -achilles <- function (connectionDetails, - cdmDatabaseSchema, - oracleTempSchema = cdmDatabaseSchema, - resultsDatabaseSchema = cdmDatabaseSchema, - scratchDatabaseSchema = resultsDatabaseSchema, - vocabDatabaseSchema = cdmDatabaseSchema, - sourceName = "", - analysisIds, - createTable = TRUE, - smallCellCount = 5, - cdmVersion = "5", - runHeel = TRUE, - validateSchema = FALSE, - runCostAnalysis = FALSE, - conceptHierarchy = TRUE, - createIndices = TRUE, - numThreads = 1, - tempAchillesPrefix = "tmpach", - dropScratchTables = TRUE, - sqlOnly = FALSE, - outputFolder = "output", - logMultiThreadPerformance = FALSE) { +achilles <- function(connectionDetails, + cdmDatabaseSchema, + # oracleTempSchema = cdmDatabaseSchema, # Unused arg + resultsDatabaseSchema = cdmDatabaseSchema, + scratchDatabaseSchema = resultsDatabaseSchema, + vocabDatabaseSchema = cdmDatabaseSchema, + sourceName = "", + analysisIds, + createTable = TRUE, + smallCellCount = 5, + cdmVersion = "5", + runHeel = TRUE, + validateSchema = FALSE, + runCostAnalysis = FALSE, + conceptHierarchy = TRUE, + createIndices = TRUE, + numThreads = 1, + tempAchillesPrefix = "tmpach", + dropScratchTables = TRUE, + sqlOnly = FALSE, + outputFolder = "output", + logMultiThreadPerformance = FALSE) { achillesSql <- c() @@ -190,17 +190,27 @@ achilles <- function (connectionDetails, schemaDelim <- "." if (numThreads == 1 || scratchDatabaseSchema == "#") { + message("Beginning single-threaded operations") + numThreads <- 1 scratchDatabaseSchema <- "#" schemaDelim <- "s_" # first invocation of the connection, to persist throughout to maintain temp tables connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + } else if (!requireNamespace("OhdsiRTools", quietly = TRUE)) { + stop( + "Multi-threading support requires package 'OhdsiRTools'.", + " Consider running single-threaded by setting", + " `numThreads = 1` and `scratchDatabaseSchema = '#'`.", + " You may install it using devtools with the following code:", + "\n devtools::install_github('OHDSI/OhdsiRTools')", + "\n\nAlternately, you might want to install ALL suggested packages using:", + "\n devtools::install_github('OHDSI/Achilles', dependencies = TRUE)", + call. = FALSE + ) } else { - if (!.is_installed("OhdsiRTools")) { - writeLines("Installing OhdsiRTools for multi-threading support") - devtools::install_github("OHDSI/OhdsiRTools") - } + message("Beginning multi-threaded operations") } # Create analysis table ------------------------------------------------------------- @@ -413,7 +423,7 @@ achilles <- function (connectionDetails, if (numThreads > 1 & !sqlOnly) { # Drop the scratch tables - writeLines(sprintf("Dropping scratch Achilles tables from schema %s", scratchDatabaseSchema)) + message(sprintf("Dropping scratch Achilles tables from schema %s", scratchDatabaseSchema)) dropAllScratchTables(connectionDetails = connectionDetails, scratchDatabaseSchema = scratchDatabaseSchema, @@ -421,7 +431,7 @@ achilles <- function (connectionDetails, numThreads = numThreads, tableTypes = c("achilles", "concept_hierarchy")) - writeLines(sprintf("Temporary Achilles tables removed from schema %s", scratchDatabaseSchema)) + message(sprintf("Temporary Achilles tables removed from schema %s", scratchDatabaseSchema)) } # Generate Main Analyses ---------------------------------------------------------------------------------------------------------------- @@ -450,7 +460,7 @@ achilles <- function (connectionDetails, achillesSql <- c(achillesSql, lapply(mainSqls, function(s) s$sql)) if (!sqlOnly) { - writeLines("Executing multiple queries. This could take a while") + message("Executing multiple queries. This could take a while") if (numThreads == 1) { for (mainSql in mainSqls) { @@ -503,7 +513,7 @@ achilles <- function (connectionDetails, if (!sqlOnly) { - writeLines("Merging scratch Achilles tables") + message("Merging scratch Achilles tables") if (numThreads == 1) { for (sql in mergeSqls) { @@ -523,7 +533,7 @@ achilles <- function (connectionDetails, } if (!sqlOnly) { - writeLines(sprintf("Done. Achilles results can now be found in schema %s", resultsDatabaseSchema)) + message(sprintf("Done. Achilles results can now be found in schema %s", resultsDatabaseSchema)) } # Clean up scratch tables ----------------------------------------------- @@ -533,7 +543,7 @@ achilles <- function (connectionDetails, DatabaseConnector::disconnect(connection = connection) } else if (dropScratchTables & !sqlOnly) { # Drop the scratch tables - writeLines(sprintf("Dropping scratch Achilles tables from schema %s", scratchDatabaseSchema)) + message(sprintf("Dropping scratch Achilles tables from schema %s", scratchDatabaseSchema)) dropAllScratchTables(connectionDetails = connectionDetails, scratchDatabaseSchema = scratchDatabaseSchema, @@ -541,7 +551,7 @@ achilles <- function (connectionDetails, numThreads = numThreads, tableTypes = c("achilles")) - writeLines(sprintf("Temporary Achilles tables removed from schema %s", scratchDatabaseSchema)) + message(sprintf("Temporary Achilles tables removed from schema %s", scratchDatabaseSchema)) } # Create concept hierarchy table ----------------------------------------------------------------- @@ -605,10 +615,10 @@ achilles <- function (connectionDetails, if (sqlOnly) { SqlRender::writeSql(sql = paste(achillesSql, collapse = "\n\n"), targetFile = file.path(outputFolder, "achilles.sql")) - writeLines(sprintf("All Achilles SQL scripts can be found in folder: %s", file.path(outputFolder, "achilles.sql"))) + message(sprintf("All Achilles SQL scripts can be found in folder: %s", file.path(outputFolder, "achilles.sql"))) } - return (achillesResults) + achillesResults } #' Create the concept hierarchy @@ -678,7 +688,7 @@ createConceptHierarchy <- function(connectionDetails, if (!sqlOnly) { - writeLines("Executing Concept Hierarchy creation. This could take a while") + message("Executing Concept Hierarchy creation. This could take a while") if (numThreads == 1) { connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) @@ -709,10 +719,10 @@ createConceptHierarchy <- function(connectionDetails, numThreads = numThreads, tableTypes = c("concept_hierarchy")) - writeLines(sprintf("Done. Concept Hierarchy table can now be found in %s", resultsDatabaseSchema)) + message(sprintf("Done. Concept Hierarchy table can now be found in %s", resultsDatabaseSchema)) } - return (c(hierarchySqls, mergeSql)) + c(hierarchySqls, mergeSql) } @@ -746,7 +756,7 @@ createIndices <- function(connectionDetails, DatabaseConnector::disconnect(connection = connection) } - return (indicesSql) + indicesSql } @@ -797,11 +807,11 @@ validateSchema <- function(connectionDetails, } else { connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) tables <- DatabaseConnector::querySql(connection = connection, sql = sql) - writeLines("CDM Schema is valid") + message("CDM Schema is valid") DatabaseConnector::disconnect(connection = connection) } - return (sql) + sql } #' Get all analysis details @@ -814,9 +824,14 @@ validateSchema <- function(connectionDetails, #' #' @export getAnalysisDetails <- function() { - pathToCsv <- system.file("csv", "achilles", "achilles_analysis_details.csv", package = "Achilles") - analysisDetails <- read.csv(file = pathToCsv, header = TRUE, stringsAsFactors = FALSE) - return (analysisDetails) + read.csv( # Recommend saving as an rda file in ./data/ + system.file( + "csv", + "achilles", + "achilles_analysis_details.csv", + package = "Achilles"), + stringsAsFactors = FALSE + ) } #' Drop all possible scratch tables @@ -947,15 +962,14 @@ dropAllScratchTables <- function(connectionDetails, cdmDatabaseSchema = cdmDatabaseSchema)$sql connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) cdmVersion <- tryCatch({ - c <- DatabaseConnector::querySql(connection = connection, sql = sql) - }, error = function (e) { - c <- "" + DatabaseConnector::querySql(connection = connection, sql = sql) + }, error = function(e) { + "" }, finally = { DatabaseConnector::disconnect(connection = connection) - connection <- NULL }) - return (c) + cdmVersion } .getAnalysisSql <- function(analysisId, @@ -1043,9 +1057,3 @@ dropAllScratchTables <- function(connectionDetails, saveRDS(object = newDf, file = logFile) } - -.is_installed <- function(pkg, version = 0) { - installed_version <- tryCatch(utils::packageVersion(pkg), error = function(e) NA) - !is.na(installed_version) && installed_version >= version -} - \ No newline at end of file diff --git a/R/AchillesHeel.R b/R/AchillesHeel.R index dd2fa1be..96de782f 100755 --- a/R/AchillesHeel.R +++ b/R/AchillesHeel.R @@ -8,7 +8,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -104,27 +104,37 @@ achillesHeel <- function(connectionDetails, schemaDelim <- "." if (numThreads == 1 || scratchDatabaseSchema == "#") { + message("Beginning single-threaded operations") + numThreads <- 1 scratchDatabaseSchema <- "#" schemaDelim <- "s_" # first invocation of the connection, to persist throughout to maintain temp tables connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) + } else if (!requireNamespace("OhdsiRTools", quietly = TRUE)) { + stop( + "Multi-threading support requires package 'OhdsiRTools'.", + " Consider running single-threaded by setting", + " `numThreads = 1` and `scratchDatabaseSchema = '#'`.", + " You may install it using devtools with the following code:", + "\n devtools::install_github('OHDSI/OhdsiRTools')", + "\n\nAlternately, you might want to install ALL suggested packages using:", + "\n devtools::install_github('OHDSI/Achilles', dependencies = TRUE)", + call. = FALSE + ) } else { - if (!.is_installed("OhdsiRTools")) { - writeLines("Installing OhdsiRTools for multi-threading support") - devtools::install_github("OHDSI/OhdsiRTools") - } + message("Beginning multi-threaded operations") } if (!sqlOnly) { - writeLines("Executing Achilles Heel. This could take a while") + message("Executing Achilles Heel. This could take a while") } # Clean up existing scratch tables ----------------------------------------------- - if (numThreads > 1 & !sqlOnly) { + if (numThreads > 1 && !sqlOnly) { # Drop the scratch tables - writeLines(sprintf("Dropping scratch Heel tables from schema %s", scratchDatabaseSchema)) + message(sprintf("Dropping scratch Heel tables from schema %s", scratchDatabaseSchema)) dropAllScratchTables(connectionDetails = connectionDetails, scratchDatabaseSchema = scratchDatabaseSchema, @@ -132,7 +142,7 @@ achillesHeel <- function(connectionDetails, numThreads = numThreads, tableTypes = c("heel")) - writeLines(sprintf("Temporary Heel tables removed from schema %s", scratchDatabaseSchema)) + message(sprintf("Temporary Heel tables removed from schema %s", scratchDatabaseSchema)) } # Generate parallel Heels --------------------------------------------------------------------------------------------------------- @@ -338,7 +348,7 @@ achillesHeel <- function(connectionDetails, if (numThreads > 1 & !sqlOnly) { # Drop the scratch tables - writeLines(sprintf("Dropping scratch Heel tables from schema %s", scratchDatabaseSchema)) + message(sprintf("Dropping scratch Heel tables from schema %s", scratchDatabaseSchema)) dropAllScratchTables(connectionDetails = connectionDetails, scratchDatabaseSchema = scratchDatabaseSchema, @@ -346,19 +356,19 @@ achillesHeel <- function(connectionDetails, numThreads = numThreads, tableTypes = c("heel")) - writeLines(sprintf("Temporary Heel tables removed from schema %s", scratchDatabaseSchema)) + message(sprintf("Temporary Heel tables removed from schema %s", scratchDatabaseSchema)) } heelSql <- paste(heelSql, collapse = "\n\n") if (sqlOnly) { SqlRender::writeSql(sql = heelSql, targetFile = file.path(outputFolder, "achillesHeel.sql")) - writeLines(sprintf("All Achilles SQL scripts can be found in folder: %s", file.path(outputFolder, "achillesHeel.sql"))) + message(sprintf("All Achilles SQL scripts can be found in folder: %s", file.path(outputFolder, "achillesHeel.sql"))) } else { - writeLines(sprintf("Done. Achilles Heel results can now be found in %s", resultsDatabaseSchema)) + message(sprintf("Done. Achilles Heel results can now be found in %s", resultsDatabaseSchema)) } - return (heelSql) + heelSql } .getHeelSql <- function(heelFile, @@ -387,5 +397,5 @@ achillesHeel <- function(connectionDetails, tempHeelPrefix = tempHeelPrefix, heelName = gsub(pattern = ".sql", replacement = "", x = basename(heelFile))) - return (sql) + sql } diff --git a/R/AchillesViewResults.R b/R/AchillesViewResults.R index c3b8481f..675439df 100755 --- a/R/AchillesViewResults.R +++ b/R/AchillesViewResults.R @@ -18,8 +18,8 @@ #' fetchAchillesHeelResults(connectionDetails, "scratch") #' } #' @export -fetchAchillesHeelResults <- function (connectionDetails, - resultsDatabaseSchema) { +fetchAchillesHeelResults <- function(connectionDetails, + resultsDatabaseSchema) { connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) sql <- SqlRender::renderSql(sql = "SELECT * FROM @resultsDatabaseSchema.achilles_heel_results", resultsDatabaseSchema = resultsDatabaseSchema)$sql @@ -27,7 +27,7 @@ fetchAchillesHeelResults <- function (connectionDetails, issues <- DatabaseConnector::querySql(connection = connection, sql = sql) DatabaseConnector::disconnect(connection = connection) - return (issues) + issues } #' @title fetchAchillesAnalysisResults @@ -94,5 +94,6 @@ fetchAchillesAnalysisResults <- function (connectionDetails, analysisResults = analysisResults) class(result) <- "achillesAnalysisResults" - return (result) + + result } diff --git a/R/addDatasource.R b/R/addDatasource.R index c71dea72..172fc385 100755 --- a/R/addDatasource.R +++ b/R/addDatasource.R @@ -8,7 +8,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/R/exportToJson.R b/R/exportToJson.R index de1522ae..24e6a716 100755 --- a/R/exportToJson.R +++ b/R/exportToJson.R @@ -8,7 +8,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -27,7 +27,7 @@ getAllReports <- function() { allReports <- read.csv(file = system.file("csv", "export", "all_reports.csv", package = "Achilles"), stringsAsFactors = FALSE, header = TRUE)$REPORT - return (allReports) + allReports } initOutputPath <- function (outputPath){ diff --git a/README.md b/README.md index eee9f47b..5dd41618 100755 --- a/README.md +++ b/README.md @@ -1,154 +1,174 @@ Achilles ======== + +[![Build Status](https://travis-ci.org/OHDSI/Achilles.svg?branch=master)](https://travis-ci.org/OHDSI/Achilles) +[![codecov.io](https://codecov.io/github/OHDSI/Achilles/coverage.svg?branch=master)](https://codecov.io/github/OHDSI/Achilles?branch=master) -Automated Characterization of Health Information at Large-scale Longitudinal Evidence Systems (ACHILLES) - descriptive statistics and data quality checks on an OMOP CDM v5 databases +**A**utomated **C**haracterization of **H**ealth **I**nformation at **L**arge-scale **L**ongitudinal **E**vidence **S**ystems (ACHILLES)---descriptive statistics and data quality checks on an OMOP CDM v5 databases Achilles consists of several parts: -1. Precomputations (for database characterization) -2. Achilles Heel for data quality -3. Export feature for AchillesWeb (or, Atlas Data Sources can read the Achilles tables directly) -4. Index generation for better performance with Atlas Data Sources - -Achilles is actively being developed for CDM v5.x only. - -Getting Started -=============== -(Please review the [Achilles Wiki](https://github.com/OHDSI/Achilles/wiki/Additional-instructions-for-Linux) for specific details for Linux) - -1. Make sure you have your data in the OMOP CDM v5.x format (https://github.com/OHDSI/CommonDataModel). - -2. Make sure that you have Java installed. If you don't have Java already installed on your computer (on most computers it already is installed), go to [java.com](http://java.com) to get the latest version. (If you have trouble building with rJava below, be sure on Windows that your Path variable includes the path to jvm.dll: - -```Windows Button and R --> type "sysdm.cpl" --> Advanced tab --> Environmental Variables button --> Edit PATH variable, and then add to the end your Java Path (e.g. ';C:/Program Files/Java/jre/bin/server')``` - -3. In R, use the following commands to install Achilles (if you have prior package installations of any of these packages, you may need to first uninstall them using the command remove.packages()). - - ```r - install.packages("devtools") - library(devtools) - install_github("OHDSI/SqlRender") - install_github("OHDSI/DatabaseConnector") - install_github("OHDSI/Achilles") - #devtools::install_github("OHDSI/Achilles@*release") #to install latest release (if master contains a bug for you) - #install_github("OHDSI/Achilles", args="--no-multiarch") #to avoid Java 32 vs 64 issues - ``` - -4. To run the Achilles analysis, first determine if you'd like to run the function in multi-threaded mode or in single-threaded mode. Use 'runCostAnalysis = FALSE' to save on execution time, as cost analyses tend to run long. - -**In multi-threaded mode** - -The analyses are run in multiple SQL sessions, which can be set using the 'numThreads' setting and setting scratchDatabaseSchema to something other than '#'. For example, 10 threads means 10 independent SQL sessions. Intermediate results are written to scratch tables before finally being combined into the final results tables. Scratch tables are permanent tables; you can either choose to have Achilles drop these tables ('dropScratchTables = TRUE') or you can drop them at a later time ('dropScratchTables = FALSE'). Dropping the scratch tables can add time to the full execution. If desired, you can set your own custom prefix for all Achilles analysis scratch tables (tempAchillesPrefix) and/or for all Achilles Heel scratch tables (tempHeelPrefix). - -**In single-threaded mode** - -The analyses are run in one SQL session and all intermediate results are written to temp tables before finally being combined into the final results tables. Temp tables are dropped once the package is finished running. Single-threaded mode can be invoked by either setting 'numThreads = 1' or 'scratchDatabaseSchema = #'. - - -use the following commands in R: - ```r - library(Achilles) - connectionDetails <- createConnectionDetails( - dbms="redshift", - server="server.com", - user="secret", - password='secret', - port="5439") - ``` - - **Single-threaded mode** - - ```r - achilles(connectionDetails, - cdmDatabaseSchema = "cdm5_inst", - resultsDatabaseSchema="results", - vocabDatabaseSchema = "vocab", - numThreads = 1, - sourceName = "My Source Name", - cdmVersion = "5.3.0", - runHeel = TRUE, - runCostAnalysis = TRUE) - ``` - **Multi-threaded mode** - - ```r - achilles(connectionDetails, - cdmDatabaseSchema = "cdm5_inst", - resultsDatabaseSchema = "results", - scratchDatabaseSchema = "scratch", - vocabDatabaseSchema = "vocab", - numThreads = 10, - sourceName = "My Source Name", - cdmVersion = "5.3.0", - runHeel = TRUE, - runCostAnalysis = TRUE) - ``` - -The "cdm5_inst" cdmDatabaseSchema parameter, "results" resultsDatabaseSchema parameter, and "scratch" scratchDatabaseSchema parameter are the fully qualified names of the schemas holding the CDM data, targeted for result writing, and holding the intermediate scratch tables, respectively. See the [DatabaseConnector](https://github.com/OHDSI/DatabaseConnector) package for details on settings the connection details for your database, for example by typing - - ```r - ?createConnectionDetails - ``` - -Execution of all Achilles pre-computations may take a long time, particularly in single-threaded mode and with COST analyses enabled. See notes.md file to find out how some analyses can be excluded to make the execution faster (excluding cost pre-computations) - -Currently "sql server", "pdw", "oracle", "postgresql", "redshift", "mysql", "impala", and "bigquery" are supported as dbms. "cdmVersion" can be *ONLY* 5.x (please look at prior commit history for v4 support). - -5. To use [AchillesWeb](https://github.com/OHDSI/AchillesWeb) to explore the Achilles statistics, you must first export the statistics to a folder JSON files, which can optionally be compressed into one gzipped file for easier transportability. - ```r - exportToJson(connectionDetails, - cdmDatabaseSchema = "cdm5_inst", - resultsDatabaseSchema = "results", - outputPath = "c:/myPath/AchillesExport", - cdmVersion = "5.3.0", - compressIntoOneFile = TRUE # creates gzipped file of all JSON files) - ``` - -6. To run only Achilles Heel (component of Achilles), use the following command: - ```r - achillesHeel(connectionDetails, - cdmDatabaseSchema = "cdm5_inst", - resultsDatabaseSchema = "results", - scratchDatabaseSchema = "scratch", - numThreads = 10, # multi-threaded mode - cdmVersion = "5.3.0") - ``` +1. Precomputations (for database characterization) -7. Possible optional additional steps: +1. Achilles Heel for data quality - - To see what errors were found (from within R), run `fetchAchillesHeelResults(connectionDetails,resultsDatabaseSchema)` +1. Export feature for AchillesWeb (or, Atlas Data Sources can read the Achilles tables directly) - - To see a particular analysis, run `fetchAchillesAnalysisResults(connectionDetails,resultsDatabaseSchema,analysisId = 2)` +1. Index generation for better performance with Atlas Data Sources - - To join data tables with some lookup (overview files), obtains those using commands below: +Achilles is actively being developed for CDM v5.x only. - - To get description of analyses, run `getAnalysisDetails()`. +## Getting Started - - To get description of derived measures, run `read.csv(system.file("csv","derived_analysis_details",package="Achilles"),as.is=T)` +(Please review the [Achilles Wiki](https://github.com/OHDSI/Achilles/wiki/Additional-instructions-for-Linux) for specific details for Linux) - - Similarly, for overview of rules, run +1. Make sure you have your data in the OMOP CDM v5.x format + (https://github.com/OHDSI/CommonDataModel). + +1. This package makes use of rJava. Make sure that you have Java installed. If you don't have Java already installed on your computer (on most computers it already is installed), go to [java.com](https://java.com) to get the latest version. If you are having trouble with rJava, [this Stack Overflow post](https://stackoverflow.com/questions/7019912/using-the-rjava-package-on-win7-64-bit-with-r) may assist you when you begin troubleshooting. + + +1. In R, use the following commands to install Achilles. + + ```r + if (!require("devtools")) install.packages("devtools") + + # To install the master branch + devtools:: install_github("OHDSI/Achilles") + + # To install latest release (if master branch contains a bug for you) + # devtools::install_github("OHDSI/Achilles@*release") + + # To avoid Java 32 vs 64 issues + # devtools::install_github("OHDSI/Achilles", args="--no-multiarch") + ``` + +1. To run the Achilles analysis, first determine if you'd like to run the function in multi-threaded mode or in single-threaded mode. Use `runCostAnalysis = FALSE` to save on execution time, as cost analyses tend to run long. + + **In multi-threaded mode** + + The analyses are run in multiple SQL sessions, which can be set using the `numThreads` setting and setting scratchDatabaseSchema to something other than `#`. For example, 10 threads means 10 independent SQL sessions. Intermediate results are written to scratch tables before finally being combined into the final results tables. Scratch tables are permanent tables; you can either choose to have Achilles drop these tables (`dropScratchTables = TRUE`) or you can drop them at a later time (`dropScratchTables = FALSE`). Dropping the scratch tables can add time to the full execution. If desired, you can set your own custom prefix for all Achilles analysis scratch tables (tempAchillesPrefix) and/or for all Achilles Heel scratch tables (tempHeelPrefix). + + **In single-threaded mode** + + The analyses are run in one SQL session and all intermediate results are written to temp tables before finally being combined into the final results tables. Temp tables are dropped once the package is finished running. Single-threaded mode can be invoked by either setting `numThreads = 1` or `scratchDatabaseSchema = "#"`. + + Use the following commands in R: -```read.csv(system.file("csv","achilles_rule.csv",package="Achilles"),as.is=T)``` - + ```r + library(Achilles) + connectionDetails <- createConnectionDetails( + dbms="redshift", + server="server.com", + user="secret", + password='secret', + port="5439") + ``` + + **Single-threaded mode** + + ```r + achilles(connectionDetails, + cdmDatabaseSchema = "cdm5_inst", + resultsDatabaseSchema="results", + vocabDatabaseSchema = "vocab", + numThreads = 1, + sourceName = "My Source Name", + cdmVersion = "5.3.0", + runHeel = TRUE, + runCostAnalysis = TRUE) + ``` + + **Multi-threaded mode** + + ```r + achilles(connectionDetails, + cdmDatabaseSchema = "cdm5_inst", + resultsDatabaseSchema = "results", + scratchDatabaseSchema = "scratch", + vocabDatabaseSchema = "vocab", + numThreads = 10, + sourceName = "My Source Name", + cdmVersion = "5.3.0", + runHeel = TRUE, + runCostAnalysis = TRUE) + ``` + + The `"cdm5_inst"` cdmDatabaseSchema parameter, `"results"` resultsDatabaseSchema parameter, and `"scratch"` scratchDatabaseSchema parameter are the fully qualified names of the schemas holding the CDM data, targeted for result writing, and holding the intermediate scratch tables, respectively. See the [DatabaseConnector](https://github.com/OHDSI/DatabaseConnector) package for details on settings the connection details for your database, for example by typing + + ```r + ?createConnectionDetails + ``` + + Execution of all Achilles pre-computations may take a long time, particularly in single-threaded mode and with COST analyses enabled. See file to find out how some analyses can be excluded to make the execution faster (excluding cost pre-computations) + + Currently `"sql server"`, `"pdw"`, `"oracle"`, `"postgresql"`, `"redshift"`, `"mysql"`, `"impala"`, and `"bigquery"` are supported as `dbms`. `cdmVersion` can be *ONLY* 5.x (please look at prior commit history for v4 support). + +1. To use [AchillesWeb](https://github.com/OHDSI/AchillesWeb) to explore the Achilles statistics, you must first export the statistics to a folder JSON files, which can optionally be compressed into one gzipped file for easier transportability. + + ```r + exportToJson(connectionDetails, + cdmDatabaseSchema = "cdm5_inst", + resultsDatabaseSchema = "results", + outputPath = "c:/myPath/AchillesExport", + cdmVersion = "5.3.0", + compressIntoOneFile = TRUE # creates gzipped file of all JSON files) + ``` + +1. To run only Achilles Heel (component of Achilles), use the following command: + + ```r + achillesHeel(connectionDetails, + cdmDatabaseSchema = "cdm5_inst", + resultsDatabaseSchema = "results", + scratchDatabaseSchema = "scratch", + numThreads = 10, # multi-threaded mode + cdmVersion = "5.3.0") + ``` + +1. Possible optional additional steps: + + - To see what errors were found (from within R), run: + ```r + fetchAchillesHeelResults(connectionDetails, resultsDatabaseSchema) + ``` + - To see a particular analysis, run: + ```r + fetchAchillesAnalysisResults(connectionDetails, resultsDatabaseSchema, analysisId = 2) + ``` + - To join data tables with some lookup (overview files), obtain those using commands below: + - To get description of analyses, run `getAnalysisDetails()`. + - To get description of derived measures, run: + ```r + read.csv( + system.file("csv", "derived_analysis_details", package = "Achilles"), + as.is = TRUE + ) + ``` + - Similarly, for overview of rules, run: + ```r + read.csv( + system.file("csv", "achilles_rule.csv", package = "Achilles"), + as.is = TRUE + ) + ``` - Also see [notes.md](extras/notes.md) for more information (in the extras folder). -Developers: How to Add or Modify Analyses -================================== +## Developers: How to Add or Modify Analyses Please refer to the [README-developers.md file](README-developers.md). +## Getting Started with Docker -Getting Started with Docker -=========================== This is an alternative method for running Achilles that does not require R and Java installations, using a Docker container instead. 1. Install [Docker](https://docs.docker.com/installation/) and [Docker Compose](https://docs.docker.com/compose/install/). -2. Clone this repository with git (`git clone https://github.com/OHDSI/Achilles.git`) and make it your working directory (`cd Achilles`). +1. Clone this repository with git (`git clone https://github.com/OHDSI/Achilles.git`) and make it your working directory (`cd Achilles`). -3. Copy `env_vars.sample` to `env_vars` and fill in the variable definitions. The `ACHILLES_DB_URI` should be formatted as `://:@/`. +3. Copy *`env_vars.sample`* to *`env_vars`* and fill in the variable definitions. The `ACHILLES_DB_URI` should be formatted as `://:@/`. -4. Copy `docker-compose.yml.sample` to `docker-compose.yml` and fill in the data output directory. +4. Copy *`docker-compose.yml.sample`* to *`docker-compose.yml`* and fill in the data output directory. 5. Build the docker image with `docker-compose build`. @@ -170,25 +190,25 @@ docker run \ ``` -License -======= +## License + Achilles is licensed under Apache License 2.0 -# Pre-computations +## Pre-computations Achilles has some compatibility with Data Quality initiatives of the Data Quality Collaborative (DQC; http://repository.edm-forum.org/dqc or GitHub https://github.com/orgs/DQCollaborative). For example, a harmonized set of data quality terms has been published by Khan at al. in 2016. What Achilles calls an *analysis* (a pre-computation for a given dataset), the term used by DQC would be *measure*. -Some Heel Rules take advantage of derived measures. A feature of Heel introduced since version 1.4. A *derived measure* is a result of an SQL query that takes Achilles analyses as input. It is simply a different view of the precomputations that has some advantage to be materialized. The logic for computing a derived measures can be viewed in the Heel SQL files in `/inst/sql/sql_server/heels`, which are described further in the [Developers README file](README-developers.md). +Some Heel Rules take advantage of derived measures. A feature of Heel introduced since version 1.4. A *derived measure* is a result of an SQL query that takes Achilles analyses as input. It is simply a different view of the precomputations that has some advantage to be materialized. The logic for computing a derived measures can be viewed in the Heel SQL files in *`/inst/sql/sql_server/heels`*, which are described further in the [Developers README file](README-developers.md). Overview of derived measures can be seen in [CSV file here](inst/csv/heel/heel_results_derived_details.csv). For possible future flexible setting of Achilles Heel rule thresholds, some Heel rules are split into two phase approach. First, a derived measure is computed and the result is stored in a separate table `ACHILLES_RESULTS_DERIVED`. A Heel rule logic is than made simpler by a simple comparison whether a derived measure is over a threshold. A link between which rules use which pre-computation is available in [CSV file here](inst/csv/heel/heel_rules_all.csv) (previously was in `inst/csv/achilles_rule.csv`) (see column `linked_measure`). -# Heel Rules +## Heel Rules Rules are classified into `CDM conformance` rules and `DQ` rules - see column `rule_type` in the [CSV file here](inst/csv/heel/heel_rules_all.csv). @@ -200,15 +220,5 @@ Rules that have in their name a prefix `[GeneralPopulationOnly]` are applicable Rules are classified into: error, warning and notification (see column `severity`). -Development -=========== -Achilles is being developed in RStudio. - -### Development status -[![Build Status](https://travis-ci.org/OHDSI/Achilles.svg?branch=master)](https://travis-ci.org/OHDSI/Achilles) -[![codecov.io](https://codecov.io/github/OHDSI/Achilles/coverage.svg?branch=master)](https://codecov.io/github/OHDSI/Achilles?branch=master) - - - -# Acknowledgements +## Acknowledgements - This project is supported in part through the National Science Foundation grant IIS 1251151. diff --git a/man/Achilles.Rd b/man/Achilles.Rd deleted file mode 100755 index b11b70b8..00000000 --- a/man/Achilles.Rd +++ /dev/null @@ -1,10 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Achilles-package.R -\docType{package} -\name{Achilles} -\alias{Achilles} -\alias{Achilles-package} -\title{Achilles} -\description{ -Achilles -} diff --git a/man/achilles.Rd b/man/achilles.Rd old mode 100755 new mode 100644 index 1cf0d569..9cd86331 --- a/man/achilles.Rd +++ b/man/achilles.Rd @@ -5,7 +5,6 @@ \title{The main Achilles analyses (for v5.x)} \usage{ achilles(connectionDetails, cdmDatabaseSchema, - oracleTempSchema = cdmDatabaseSchema, resultsDatabaseSchema = cdmDatabaseSchema, scratchDatabaseSchema = resultsDatabaseSchema, vocabDatabaseSchema = cdmDatabaseSchema, sourceName = "", diff --git a/man/allReports.Rd b/man/allReports.Rd deleted file mode 100755 index b0f9f2a4..00000000 --- a/man/allReports.Rd +++ /dev/null @@ -1,10 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Achilles-package.R -\docType{data} -\name{allReports} -\alias{allReports} -\title{allReports} -\description{ -A list of reports -} -\keyword{datasets} diff --git a/man/dropAllScratchTables.Rd b/man/dropAllScratchTables.Rd index cec31606..ddeae406 100755 --- a/man/dropAllScratchTables.Rd +++ b/man/dropAllScratchTables.Rd @@ -6,8 +6,7 @@ \usage{ dropAllScratchTables(connectionDetails, scratchDatabaseSchema, tempAchillesPrefix = "tmpach", tempHeelPrefix = "tmpheel", - numThreads = 1, tableTypes = c("achilles", "heel", - "concept_hierarchy")) + numThreads = 1, tableTypes = c("achilles", "heel", "concept_hierarchy")) } \arguments{ \item{connectionDetails}{An R object of type \code{connectionDetails} created using the function \code{createConnectionDetails} in the \code{DatabaseConnector} package.}