From ea9f6fd19ec73300ef8296f340003fb433a08714 Mon Sep 17 00:00:00 2001 From: Sebastian Jentschke Date: Sun, 10 Nov 2024 23:38:11 +0100 Subject: [PATCH] Improved mkeBin (to accept character matrices too), and created the respective unit tests --- R/distances_omv.R | 10 +++++++--- tests/testthat/test-distances_omv.R | 13 ++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/R/distances_omv.R b/R/distances_omv.R index a8817ce..0c97fd0 100644 --- a/R/distances_omv.R +++ b/R/distances_omv.R @@ -187,7 +187,7 @@ distances_omv <- function(dtaInp = NULL, fleOut = "", varDst = c(), clmDst = TRU } # convert to matrix, and transpose if necessary =============================================== - if (clmDst) dtaMtx <- as.matrix(dtaFrm) else dtaMtx <- t(as.matrix(dtaFrm)) + if (clmDst) dtaMtx <- as.matrix(dtaFrm[, varDst]) else dtaMtx <- t(as.matrix(dtaFrm[, varDst])) # standardize the data ======================================================================== if (grepl("^none$", stdDst)) { @@ -322,12 +322,16 @@ mkeBin <- function(m = NULL, p = 1, np = 0) { if (all(apply(m, 2, is.logical))) return(m) if (all(apply(m, 2, is.numeric))) { - r <- matrix(as.logical(m * NA), nrow = nrow(m), dimnames = dimnames(m)) + r <- matrix(as.logical(NA), nrow = nrow(m), ncol = ncol(m), dimnames = dimnames(m)) r[m == p] <- TRUE r[m == np] <- FALSE + } else if (all(apply(m, 2, function(c) is.character(c) && all(as.character(c(p, np)) %in% unique(c))))) { + r <- matrix(as.logical(NA), nrow = nrow(m), ncol = ncol(m), dimnames = dimnames(m)) + r[m == as.character(p)] <- TRUE + r[m == as.character(np)] <- FALSE } else { stop(paste("The input matrix for binary data either needs to be logical (then it will be kept as it is),", - "or numeric (where p and np are used to derive TRUE and FALSE).")) + "numeric or character (for the latter two, p and np are used to derive TRUE and FALSE).")) } r diff --git a/tests/testthat/test-distances_omv.R b/tests/testthat/test-distances_omv.R index df77af7..d79e9e5 100644 --- a/tests/testthat/test-distances_omv.R +++ b/tests/testthat/test-distances_omv.R @@ -805,10 +805,13 @@ test_that("distances_omv works", { # check mkeBin mt4Chk <- mkeBin(as.matrix(binFm5), 1, 2) - expect_equal(dim(mt4Chk), dim(frqFrm)) - expect_equal(unname(apply(mt4Chk, 2, class)), rep("logical", dim(binFm3)[2])) + expect_equal(dim(mt4Chk), dim(binFm5)) + expect_equal(unname(apply(mt4Chk, 2, class)), rep("logical", dim(binFm5)[2])) expect_equal(unname(apply(mt4Chk, 2, table)), unname(apply(binFm5, 2, table)[c(2, 1), ])) - + expect_equal(mt4Chk, mkeBin(as.matrix(as.data.frame(lapply(binFm5, as.factor))), 1, 2)) + expect_equal(binFrm, as.data.frame(mkeBin(apply(apply(as.matrix(binFrm), 2, as.integer), 2, as.factor)))) + expect_equal(binFrm, as.data.frame(mkeBin(apply(apply(as.matrix(binFrm), 2, as.integer), 2, as.factor), 1, 0))) + expect_equal(binFrm, as.data.frame(mkeBin(as.matrix(as.data.frame(lapply(binFrm, function(c) as.character(as.integer(!c) + 1)))), 1, 2))) # test cases for error messages =================================================================================== expect_error(distances_omv(fleInp = nmeInp, fleOut = nmeOut, varDst = names(cntFrm)), @@ -822,9 +825,9 @@ test_that("distances_omv works", { expect_error(distances_omv(dtaInp = cntFrm, fleOut = nmeOut, varDst = names(cntFrm), nmeDst = "wrong"), regexp = "^Invalid distance measure: \\w+\\. See Details in the help for further information\\.") expect_error(clcFrq(as.matrix(frqFrm), "wrong"), regexp = "clcFrq: Method wrong is not implemented\\.") - expect_error(mkeBin(apply(as.matrix(binFrm), 2, as.character), "TRUE", "FALSE"), + expect_error(mkeBin(apply(as.matrix(binFrm), 2, as.complex), "TRUE", "FALSE"), regexp = paste("The input matrix for binary data either needs to be logical \\(then it will be kept as it is\\),", - "or numeric \\(where p and np are used to derive TRUE and FALSE\\).")) + "numeric or character \\(for the latter two, p and np are used to derive TRUE and FALSE\\)\\.")) expect_error(mtcBin(binFm3[, 1], binFm3[, 1], "beuclid"), regexp = "mtcBin: Input columns to the calculation of binary measures must be logical\\.") expect_error(mtcBin(binFrm[, 1], binFrm[, 2], "wrong"), regexp = "mtcBin: Method wrong is not implemented\\.")