Skip to content

Commit

Permalink
Merge pull request #16 from maize-genetics/fix-na
Browse files Browse the repository at this point in the history
Improve input checks
  • Loading branch information
btmonier authored Oct 25, 2024
2 parents 7218d44 + 18e86b3 commit 435f6c2
Show file tree
Hide file tree
Showing 12 changed files with 114 additions and 38 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rPHG2
Title: R interface to PHGv2
Version: 0.7
Version: 0.7.1
Authors@R:
person(
given = "Brandon",
Expand Down
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
## rPHG2 0.8
* Fixed missing IDs returned in `readHapIds()` method calls.
+ missing values (i.e., `"null"`) now return as `NA`s in the matrix object
* Removed `rlang::abort()` condition in `initPhg()`
+ Now returns "warning" message instead of low-level `stop()` procedure
* Modified the `PHGLocalCon()` constructor to now take in either files or a
directory path


## rPHG2 0.7
* Added new parameter to `plotHaploCounts()`:
+ `geom`
Expand Down
40 changes: 35 additions & 5 deletions R/class_phg_con_local.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,47 @@ setValidity("PHGLocalCon", function(object) {
#' DB data for a given set of PHG-related methods.
#'
#' @param hVcfFiles
#' A list of \href{https://github.com/maize-genetics/phg_v2/blob/main/docs/hvcf_specifications.md}{hVCF}
#' files as a \code{character} vector
#' A path to a directory or file containing valid \href{https://github.com/maize-genetics/phg_v2/blob/main/docs/hvcf_specifications.md}{hVCF}
#' files (ending in either \code{.h.vcf} or \code{.h.vcf.gz}) as a
#' \code{character} vector
#'
#' @export
PHGLocalCon <- function(hVcfFiles) {
# This is probably overkill right now, but will revisit once the TileDB
# C API is better integrated in PHGv2...
# TODO - This is probably overkill right now, but will revisit once the TileDB
# C API is better integrated in PHGv2...

# Initial type checking
if (!is.vector(hVcfFiles) || !is.character(hVcfFiles)) {
rlang::abort("Input is not a valid 'character' vector")
}

# Check if directory or files exist
procFiles <- NULL
hvcfExtPattern <- "\\.h\\.vcf(\\.gz)?$"
if (any(dir.exists(hVcfFiles))) {
# Get all files ending with .h.vcf or .h.vcf.gz in the directory
procFiles <- list.files(
path = hVcfFiles,
pattern = hvcfExtPattern,
full.names = TRUE
)

if (length(procFiles) == 0) {
rlang::abort("No files ending with .h.vcf or .h.vcf.gz found in provided directory")
}
} else if (any(file.exists(hVcfFiles))) {
if (any(grepl(hvcfExtPattern, hVcfFiles))) {
procFiles <- hVcfFiles[grepl(hvcfExtPattern, hVcfFiles)]
}
} else {
rlang::abort("The input path is neither a valid directory nor a valid file")
}

methods::new(
Class = "PHGLocalCon",
phgType = "local",
host = NA_character_, # making this NA for now
hVcfFiles = normalizePath(hVcfFiles)
hVcfFiles = normalizePath(procFiles)
)
}

Expand Down Expand Up @@ -93,6 +122,7 @@ setMethod(
present <- cli::col_green(cli::symbol$square_small_filled)
absent <- cli::col_grey(cli::symbol$square_small)

#' TODO - implement with direct calls to TileDB
dbUriStatus <- ifelse(is.na(object@host), absent, present)

if (any(is.na(object@hVcfFiles))) {
Expand Down
2 changes: 1 addition & 1 deletion R/class_phg_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ setMethod(

uniqHaps <- data.frame(
rr_id = colnames(hapIds),
n_haplo = apply(hapIds, 2, function(it) length(unique(it[it != "null"])))
n_haplo = apply(hapIds, 2, function(it) length(unique(it[!is.na(it)])))
)

return(tibble::as_tibble(
Expand Down
45 changes: 24 additions & 21 deletions R/init_phg.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@
#'
#' @export
initPhg <- function(phgPath, check = TRUE, verbose = TRUE) {
# Set up CLI message components
# Note: I **know** there are easier ways of accomplishing this but I
# need to do this 'ad-hoc' for JupyterHub notebooks!
cliSuccess <- cli::col_green(cli::symbol$tick)
cliInform <- cli::col_blue(cli::symbol$info)
cliNote <- cli::col_red(cli::symbol$warning)
cliWarn <- cli::col_red(cli::symbol$cross)
cliBullet <- cli::symbol$bullet

if (!is.character(phgPath) || !dir.exists(phgPath)) {
rlang::abort("PHG library path ('phgPath') provided does not exist")
}
Expand All @@ -22,33 +31,27 @@ initPhg <- function(phgPath, check = TRUE, verbose = TRUE) {
rlang::abort("Cannot find 'phg_v2.jar' in library path")
}

# Check if JVM is already initialized - if it is, show warning message and
# move on
loadMsg <- paste0(cliSuccess, " PHG JARs added to class path")
if (isJvmInitialized() && "phg_v2.jar" %in% basename(rJava::.jclassPath())) {
rlang::abort("PHGv2 JARs already added to class path")
loadMsg <- paste0(cliWarn, " PHGv2 JARs already added to class path - skipping...")
} else {
tryCatch(
{
rJava::.jinit()
rJava::.jaddClassPath(dir(phgPath, full.names = TRUE))
},
error = function(e) {
rlang::abort("Java initialization or class path addition failed", parent = e)
}
)
}

tryCatch(
{
rJava::.jinit()
rJava::.jaddClassPath(dir(phgPath, full.names = TRUE))
},
error = function(e) {
rlang::abort("Java initialization or class path addition failed", parent = e)
}
)

# Set up CLI message components
# Note: I **know** there are easier ways of accomplishing this but I
# need to do this 'ad-hoc' for JupyterHub notebooks!
cliSuccess <- cli::col_green(cli::symbol$tick)
cliInform <- cli::col_blue(cli::symbol$info)
cliWarn <- cli::col_red(cli::symbol$warning)
cliBullet <- cli::symbol$bullet

# Get version stats
jvmStats <- jvmStats()

# Get PHG version messages / set up load message
loadMsg <- paste0(cliSuccess, " PHG JARs added to class path")
currentPhgVersion <- phgVersion(jvmStats)
cliCurrent <- cli::style_bold(currentPhgVersion)
phgMsg <- paste0(" ", cliBullet, " PHG version....: ", cliCurrent)
Expand All @@ -75,7 +78,7 @@ initPhg <- function(phgPath, check = TRUE, verbose = TRUE) {
phgWarnMsg <- sprintf(
" %s\n %s %sCurrent version of PHGv2 (%s) is out of date (%s)\n %s consider updating (%s)",
cli::symbol$line,
cliWarn,
cliNote,
cli::style_bold(cli::symbol$sup_1),
cliCurrent,
cliLatest,
Expand Down
4 changes: 4 additions & 0 deletions R/read_hap_ids.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ hapIdsFromJvmGraph <- function(jvmGraph, nThreads) {
jm <- interface$getHapIdMatrixFromGraph(jvmGraph, nThreads)
rm <- jm |> kotlin2DArrayToRMatrix()

# Convert missing IDs to R NAs
missingValues <- c("", "null")
rm[rm %in% missingValues] <- NA

return(rm)
}

Expand Down
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
<!-- badges: end -->

`rPHG2` is a system to interact with and retrieve information from **version 2**
of the Practical Haplotype Graph (PHGv2) - a general, graph-based, computational
framework for genotype inference. This is accomplished by leveraging the
[Breeding](https://brapi.org/) and
of the [Practical Haplotype Graph (PHGv2)](https://phg.maizegenetics.net/) - a
general, graph-based, computational framework for genotype inference. This is
accomplished by leveraging the [Breeding](https://brapi.org/) and
[PHG](https://github.com/maize-genetics/phg_v2) APIs.

## Installation
Expand All @@ -30,8 +30,7 @@ initPhg("my/phg/jar/path")


# Create a connection
locCon <- list.files("my/hvcf/dir") |>
PHGLocalCon()
locCon <- PHGLocalCon("my/hvcf/dir")


# Build a graph
Expand Down
8 changes: 8 additions & 0 deletions man/PHGLocalCon-class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions man/PHGLocalCon.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion tests/testthat/setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ initPhg(phgLibPath)

## Test post-initialization ----
testthat::test_that("JVM init checker works", {
testthat::expect_error(rPHG2::initPhg(phgLibPath))
msg <- utils::capture.output(rPHG2::initPhg(phgLibPath))

testthat::expect_match(msg[1], "PHGv2 JARs already added to class path" )
})


15 changes: 13 additions & 2 deletions tests/testthat/test_class_phg_con_local.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
test_that("Local connection tests", {
test_that("General local connection tests", {
hVcfFileDir <- system.file("extdata", package = "rPHG2")
hVcfFiles <- list.files(hVcfFileDir, pattern = ".h.vcf$", full.names = TRUE)
locCon <- PHGLocalCon(hVcfFiles)

locConOutput <- utils::capture.output(locCon)
expect_error(PHGLocalCon(mtcars))
expect_error(PHGLocalCon(tempdir()))
expect_error(PHGLocalCon(tempfile()))

locConOutput <- utils::capture.output(locCon)
expect_equal(length(locConOutput), 3)

})


test_that("Local connection from directory", {
hVcfFileDir <- system.file("extdata", package = "rPHG2")
locCon <- PHGLocalCon(hVcfFileDir)
locConOutput <- utils::capture.output(locCon)
expect_equal(length(locConOutput), 3)

})
9 changes: 9 additions & 0 deletions vignettes/rPHG2.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,15 @@ localCon <- hVcfFiles |> PHGLocalCon()
localCon
```

Additionally, if you have a **directory** of valid hVCF files, you can simply
pass the directory path to the constructor. In the following example, I will
point to the `extdata` data directory _without_ pointing to singular hVCF files
that is found within the package:

```{r, eval=TRUE, echo=TRUE}
system.file("extdata", package = "rPHG2") |> PHGLocalCon()
```

From here, we can move to the [next section](#creating-jvm-objects) to create a `HaplotypeGraph`
object interface with the JVM.

Expand Down

0 comments on commit 435f6c2

Please sign in to comment.