From d1533d9cd7f038ef4a103aa37b2c5fa42a43173a Mon Sep 17 00:00:00 2001 From: rmflight Date: Fri, 12 Apr 2024 10:35:03 -0400 Subject: [PATCH] adds rank_order_data and makes the inputs across the three functions all match --- DESCRIPTION | 5 +- NAMESPACE | 1 + R/left_censorship.R | 19 +-- R/rank-ordering.R | 28 ++++- README.Rmd | 1 + README.html | 18 +-- README.md | 13 ++- docs/404.html | 2 +- docs/CODE_OF_CONDUCT.html | 2 +- docs/LICENSE-text.html | 2 +- docs/LICENSE.html | 2 +- docs/articles/ici-kendalltau.html | 12 +- docs/articles/index.html | 2 +- .../articles/testing-for-left-censorship.html | 75 +++++++++++- .../figure-html/fig-yeast-nna-1.png | Bin 0 -> 55703 bytes .../figure-html/fig-yeast-original-1.png | Bin 0 -> 263046 bytes .../figure-html/fig-yeast-reordered-1.png | Bin 0 -> 263918 bytes docs/authors.html | 6 +- docs/index.html | 14 +-- docs/news/index.html | 2 +- docs/pkgdown.yml | 2 +- docs/reference/add_uniform_noise.html | 2 +- docs/reference/calculate_matrix_medians.html | 2 +- docs/reference/cor_matrix_2_long_df.html | 2 +- docs/reference/disable_logging.html | 2 +- docs/reference/enable_logging.html | 2 +- docs/reference/ici_kendalltau.html | 2 +- docs/reference/ici_kendalltau_ref.html | 2 +- docs/reference/ici_kt.html | 2 +- docs/reference/index.html | 12 +- docs/reference/kt_fast.html | 2 +- docs/reference/log_memory.html | 2 +- docs/reference/log_message.html | 2 +- docs/reference/long_df_2_cor_matrix.html | 2 +- docs/reference/missing_dataset.html | 2 +- docs/reference/pairwise_completeness.html | 2 +- docs/reference/rank_order_data.html | 109 ++++++++++++++++++ docs/reference/show_progress.html | 2 +- docs/reference/test_left_censorship.html | 10 +- docs/reference/yeast_missing.html | 105 +++++++++++++++++ docs/search.json | 2 +- docs/sitemap.xml | 6 + man/rank_order_data.Rd | 22 ++++ man/test_left_censorship.Rd | 8 +- tests/testthat/test-left_censorship.R | 10 ++ vignettes/testing-for-left-censorship.Rmd | 53 ++++++++- 46 files changed, 494 insertions(+), 79 deletions(-) create mode 100644 docs/articles/testing-for-left-censorship_files/figure-html/fig-yeast-nna-1.png create mode 100644 docs/articles/testing-for-left-censorship_files/figure-html/fig-yeast-original-1.png create mode 100644 docs/articles/testing-for-left-censorship_files/figure-html/fig-yeast-reordered-1.png create mode 100644 docs/reference/rank_order_data.html create mode 100644 docs/reference/yeast_missing.html create mode 100644 man/rank_order_data.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 3c13e71..69debb6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ICIKendallTau Title: Calculates information-content-informed Kendall-tau -Version: 1.1.1 +Version: 1.1.2 Authors@R: c( person( given = c("Robert", "M"), @@ -37,7 +37,8 @@ Suggests: furrr, dplyr, logger, withr, - naniar + ggplot2, + visdat URL: https://moseleybioinformaticslab.github.io/ICIKendallTau/ https://github.com/moseleybioinformaticslab/ICIKendallTau/ BugReports: diff --git a/NAMESPACE b/NAMESPACE index 2d1dcf3..dc81870 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(log_memory) export(log_message) export(long_df_2_cor_matrix) export(pairwise_completeness) +export(rank_order_data) export(show_progress) export(test_left_censorship) importFrom(Rcpp,sourceCpp) diff --git a/R/left_censorship.R b/R/left_censorship.R index fd9ffc5..6af75a0 100644 --- a/R/left_censorship.R +++ b/R/left_censorship.R @@ -4,7 +4,7 @@ #' is due to values being below the limit of detection, or coming from a #' left-censored distribution. #' -#' @param in_data matrix or data.frame of numeric data +#' @param data_matrix matrix or data.frame of numeric data #' @param sample_classes which samples are in which class #' @param global_na what represents zero or missing? #' @@ -30,25 +30,28 @@ #' #' @export #' @return data.frame of trials / successes, and binom.test result -test_left_censorship = function(in_data, +test_left_censorship = function(data_matrix, sample_classes = NULL, global_na = c(NA, Inf, 0)) { + if (inherits(data_matrix, "data.frame")) { + data_matrix = as.matrix(data_matrix) + } if (is.null(sample_classes)) { - sample_classes = rep("A", ncol(in_data)) + sample_classes = rep("A", ncol(data_matrix)) } - split_indices = split(seq_len(ncol(in_data)), sample_classes) - missing_loc = setup_missing_matrix(in_data, global_na) - in_data_missing = in_data - in_data_missing[missing_loc] = NA + split_indices = split(seq_len(ncol(data_matrix)), sample_classes) + missing_loc = setup_missing_matrix(data_matrix, global_na) + data_matrix_missing = data_matrix + data_matrix_missing[missing_loc] = NA # split the dataset by group split_counts = purrr::imap(split_indices, \(in_split, split_id){ # in_split = split_indices[[1]] # grab the group we want to work with - split_missing = in_data_missing[, in_split, drop = FALSE] + split_missing = data_matrix_missing[, in_split, drop = FALSE] # count the number of missing samples for each feature, # and keep those that have at least one diff --git a/R/rank-ordering.R b/R/rank-ordering.R index 32dbf5f..ccd54f2 100644 --- a/R/rank-ordering.R +++ b/R/rank-ordering.R @@ -1,5 +1,21 @@ +#' Rank order row data +#' +#' Given a data-matrix of numeric data, calculates the rank of each row in each +#' column (feature in sample), gets the median rank across all columns, and +#' returns the original data with missing values set to NA, the reordered data, +#' and a data.frame of the ranks of each feature and the number of missing values. +#' +#' @param data_matrix matrix or data.frame of values +#' @param global_na the values to consider as missing +#' +#' @export +#' +#' @returns list with two matrices and a data.frame rank_order_data = function(data_matrix, global_na = c(NA, Inf, 0)) { + if (inherits(data_matrix, "data.frame")) { + data_matrix = as.matrix(data_matrix) + } missing_loc = setup_missing_matrix(data_matrix, global_na) data_matrix_na = data_matrix data_matrix_na[missing_loc] = NA @@ -8,12 +24,16 @@ rank_order_data = function(data_matrix, global_na = c(NA, Inf, 0)) }) sample_ranks = do.call(cbind, sample_ranks) - median_ranks = apply(sample_ranks, 1, median) - rank_order = order(median_ranks, decreasing = TRUE) + median_rank = apply(sample_ranks, 1, median) + + n_na = rowSums(is.na(data_matrix_na)) + rank_order = order(median_rank, decreasing = TRUE) perc_missing = colSums(is.na(data_matrix_na)) / nrow(data_matrix_na) perc_order = order(perc_missing, decreasing = TRUE) - return(original = data_matrix_na, - ordered = data_matrix_na[rank_order, perc_order]) + return(list(original = data_matrix_na, + ordered = data_matrix_na[rank_order, perc_order], + n_na_rank = data.frame(n_na = n_na, + median_rank = median_rank))) } diff --git a/README.Rmd b/README.Rmd index 40e33d5..0630658 100644 --- a/README.Rmd +++ b/README.Rmd @@ -66,6 +66,7 @@ The functions that implement this include: * Otherwise will only use a single core. We've also included a function for testing if the missingness in your data comes from left-censorship, `test_left_censorship`. We walk through creating example data and testing it in the vignette [Testing for Left Censorship](https://moseleybioinformaticslab.github.io/ICIKendallTau/articles/testing-for-left-censorship). +In addition to testing, you can also visualize the missing data pattern by feature rank using the `rank_order_data` function, and use `visdat::vis_miss()` on the original and reordered missing data. ## Examples diff --git a/README.html b/README.html index b6d099e..8dfafa2 100644 --- a/README.html +++ b/README.html @@ -606,7 +606,7 @@

ICIKendallTau

-

ICIKendallTau status badge

+

ICIKendallTau status badge

You can see the pkgdown site here.

@@ -665,7 +665,11 @@

Package Functions

We’ve also included a function for testing if the missingness in your data comes from left-censorship, test_left_censorship. We walk through creating example data and testing it in the vignette Testing -for Left Censorship.

+for Left Censorship. In addition to testing, you can also visualize +the missing data pattern by feature rank using the +rank_order_data function, and use +visdat::vis_miss() on the original and reordered missing +data.

Examples

The most common case is a large matrix of independent samples (columns) and measured features in each of the samples (i.e. gene @@ -725,10 +729,10 @@

Is It Fast?

times = 5 ) #> Unit: microseconds -#> expr min lq mean median uq max neval -#> cor(x, y, method = "kendall") 11685.244 12730.878 12860.9060 13071.630 13406.514 13410.264 5 -#> ici_kt(x, y, "global") 263.306 268.503 332.1288 274.858 283.589 570.388 5 -#> ici_kt(x2, y2, "global") 14110.743 14322.836 15782.6490 16053.907 16595.979 17829.780 5 +#> expr min lq mean median uq max neval +#> cor(x, y, method = "kendall") 11666.371 11671.671 12405.0886 12084.801 13276.95 13325.649 5 +#> ici_kt(x, y, "global") 253.826 255.717 430.9002 277.985 317.24 1049.733 5 +#> ici_kt(x2, y2, "global") 13405.302 13731.770 15208.7108 14693.928 15415.66 18796.894 5

In the case of 40,000 features, the average time on a modern CPU is 14 milliseconds.

Of course, if you want to use it to calculate Kendall-tau-b without @@ -770,7 +774,7 @@

Many Many Comparisons

#> 3 s4 s4 0 1.0000000 0 1.000000 1.0000000 #> #> $run_time -#> [1] 0.01747489 +#> [1] 0.01783729

Code of Conduct

Please note that the ICIKendallTau project is released with a Contributor Code of Conduct. By contributing to this project, you agree to abide diff --git a/README.md b/README.md index c50e03f..680ba20 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,9 @@ We’ve also included a function for testing if the missingness in your data comes from left-censorship, `test_left_censorship`. We walk through creating example data and testing it in the vignette [Testing for Left Censorship](https://moseleybioinformaticslab.github.io/ICIKendallTau/articles/testing-for-left-censorship). +In addition to testing, you can also visualize the missing data pattern +by feature rank using the `rank_order_data` function, and use +`visdat::vis_miss()` on the original and reordered missing data. ## Examples @@ -142,10 +145,10 @@ microbenchmark( times = 5 ) #> Unit: microseconds -#> expr min lq mean median uq max neval -#> cor(x, y, method = "kendall") 11685.244 12730.878 12860.9060 13071.630 13406.514 13410.264 5 -#> ici_kt(x, y, "global") 263.306 268.503 332.1288 274.858 283.589 570.388 5 -#> ici_kt(x2, y2, "global") 14110.743 14322.836 15782.6490 16053.907 16595.979 17829.780 5 +#> expr min lq mean median uq max neval +#> cor(x, y, method = "kendall") 11666.371 11671.671 12405.0886 12084.801 13276.95 13325.649 5 +#> ici_kt(x, y, "global") 253.826 255.717 430.9002 277.985 317.24 1049.733 5 +#> ici_kt(x2, y2, "global") 13405.302 13731.770 15208.7108 14693.928 15415.66 18796.894 5 ``` In the case of 40,000 features, the average time on a modern CPU is 14 @@ -204,7 +207,7 @@ r_4 #> 3 s4 s4 0 1.0000000 0 1.000000 1.0000000 #> #> $run_time -#> [1] 0.01747489 +#> [1] 0.01783729 ``` ## Code of Conduct diff --git a/docs/404.html b/docs/404.html index fc211cc..82f49f1 100644 --- a/docs/404.html +++ b/docs/404.html @@ -24,7 +24,7 @@ ICIKendallTau - 1.1.0 + 1.1.2 + +

+ + + +
+
+
+ +
+

Given a data-matrix of numeric data, calculates the rank of each row in each +column (feature in sample), gets the median rank across all columns, and +returns the original data with missing values set to NA, the reordered data, +and a data.frame of the ranks of each feature and the number of missing values.

+
+ +
+

Usage

+
rank_order_data(data_matrix, global_na = c(NA, Inf, 0))
+
+ +
+

Arguments

+
data_matrix
+

matrix or data.frame of values

+ + +
global_na
+

the values to consider as missing

+ +
+
+

Value

+ + +

list with two matrices and a data.frame

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/show_progress.html b/docs/reference/show_progress.html index 349fe25..537e697 100644 --- a/docs/reference/show_progress.html +++ b/docs/reference/show_progress.html @@ -12,7 +12,7 @@ ICIKendallTau - 1.1.0 + 1.1.2 + + + + + +
+
+
+ +
+

An example dataset from RNA-seq experiment on yeast, created by Gierliński et al., +"Statistical models for RNA-seq data derived from a two-condition 48-replicate experiment", +Bioinformatics, 31, 2015 https://doi.org/10.1093/bioinformatics/btv425.

+
+ +
+

Usage

+
yeast_missing
+
+ +
+

Format

+ +
+

yeast_missing

+ + +

A matrix with 6887 rows (genes) and 96 columns (samples).

+
+ +
+ + +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/search.json b/docs/search.json index 2129380..28e35c0 100644 --- a/docs/search.json +++ b/docs/search.json @@ -1 +1 @@ -[{"path":[]},{"path":"/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement rflight79@gmail.com. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.0, available https://www.contributor-covenant.org/version/2/0/ code_of_conduct.html. Community Impact Guidelines inspired Mozilla’s code conduct enforcement ladder. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https:// www.contributor-covenant.org/translations.","code":""},{"path":"/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) Robert M Flight & Hunter NB Moseley Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"/articles/ici-kendalltau.html","id":"problem","dir":"Articles","previous_headings":"","what":"Problem","title":"Information-Content-Informed Kendall Tau Correlation","text":"handle missing data (.e. NA’s) calculating correlation two variables. However, whether observation made semi-quantitative information many analytical measurements sensitivity limits. .e. many cases, missing observations “missing--random”, “missing---random” due falling detection limit. cases, NA informative.","code":""},{"path":"/articles/ici-kendalltau.html","id":"approach","dir":"Articles","previous_headings":"","what":"Approach","title":"Information-Content-Informed Kendall Tau Correlation","text":"Kendall Tau Correlation coefficient calculates correlation based number concordant discordant pairs: \\(\\tau = \\frac{ | pairs_{concordant} | - | pairs_{discordant} |}{\\binom{n}{2}}\\) pair two x-y data points. \\(x_i > x_j\\) \\(y_i > y_j\\) \\(x_i < x_j\\) \\(y_i < y_j\\) \\(x_i > x_j\\) \\(y_i < y_j\\) \\(x_i < x_j\\) \\(y_i > y_j\\) definitions can expanded handle missing observations: \\(x_i > x_j\\) \\(y_i > y_j\\) \\(x_i < x_j\\) \\(y_i < y_j\\) \\(x_i > x_j\\) \\(y_i \\& !y_j\\) \\(x_i < x_j\\) \\(!y_i \\& y_j\\) \\(x_i \\& !x_j\\) \\(y_i > y_j\\) \\(!x_i \\& x_j\\) \\(y_i < y_j\\) \\(x_i \\& !x_j\\) \\(y_i \\& !y_j\\) (used local perspective version) \\(x_i \\& x_j\\) \\(!y_i \\& y_j\\) (used local perspective version) \\(x_i > x_j\\) \\(y_i < y_j\\) \\(x_i < x_j\\) \\(y_i > y_j\\) \\(x_i > x_j\\) \\(!y_i \\& y_j\\) \\(x_i < x_j\\) \\(y_i \\& !y_j\\) \\(x_i \\& !x_j\\) \\(y_i < y_j\\) \\(!x_i \\& x_j\\) \\(y_i > y_j\\) \\(x_i \\& !x_j\\) \\(!y_i \\& y_j\\) (used local perspective version) \\(!x_i \\& x_j\\) \\(y_i \\& !y_j\\) (used local perspective version) Also data points missing x y values naturally reduce strength correlation value, since can neither concordant discordant another (NA, NA) pair, impact denominator. equivalent removing last two concordant discordant pair tests.","code":""},{"path":"/articles/ici-kendalltau.html","id":"handling-tied-values","dir":"Articles","previous_headings":"","what":"Handling Tied Values","title":"Information-Content-Informed Kendall Tau Correlation","text":"base Kendall tau correlation must adjusted handle tied values, ie. tau-b version equation. \\[\\tau = \\frac{ | pairs_{concordant} | - | pairs_{discordant} |}{\\sqrt{ ( n_0 - n_{xtie} ) ( n_0 - n_{ytie} )}} \\] : \\(n_0 = \\frac{n \\left ( n - 1 \\right )}{2}\\) \\(n_{xtie} = \\sum_{}^{} \\frac{t_{xi} \\left ( t_{xi} - 1 \\right )}{2}\\) \\(n_{ytie} = \\sum_{}^{} \\frac{t_{yi} \\left ( t_{yi} - 1 \\right )}{2}\\) \\(t_{xi}\\) - size ith group tied x values. \\(t_{yi}\\) - size ith group tied y values. local perspective, number NAs x y can treated group tied values calculation \\(n_{xtie}\\) \\(n_{ytie}\\), respectively.","code":""},{"path":"/articles/ici-kendalltau.html","id":"scaling-by-the-correlation-with-the-highest-information-content","dir":"Articles","previous_headings":"","what":"Scaling by the correlation with the highest information content","title":"Information-Content-Informed Kendall Tau Correlation","text":"generating correlation matrix (heatmap) large analytical datasets, number observations common can become quite low two variables. becomes advantageous scale pair variables highest information content. One objective scaling factor highest possible absolute correlation maximum information content observed across dataset, dividing maximum possible absolute correlation scale whole dataset appropriately. \\[maxcorr = \\frac{\\binom{n-m}{2} + n * m}{\\binom{n-m}{2} + n * m + \\binom{m}{2}}\\] : Choose two variables least number missing values across dataset. n length variables. formula based perfect correlation given number (NA,NA) pairs added.","code":""},{"path":"/articles/ici-kendalltau.html","id":"functions","dir":"Articles","previous_headings":"","what":"Functions","title":"Information-Content-Informed Kendall Tau Correlation","text":"functions implement include: option perspective control NA values influence ties. comparing multiple samples, likely want use perspective = \"global\". Rows features, columns samples. library(furrr) plan(multiprocess) Otherwise use single core. ’ve also included function testing missingness data comes left-censorship, test_left_censorship. walk creating example data testing vignette Testing Left Censorship.","code":""},{"path":"/articles/ici-kendalltau.html","id":"implementation","dir":"Articles","previous_headings":"","what":"Implementation","title":"Information-Content-Informed Kendall Tau Correlation","text":"turns , think really hard, truly necessary replace missing values vector value smaller minimum value one. local version, first remove common missing values vector. C++ implementation explicitly can speed, instead wrapping {stats::cor} function. also use double merge-sort algorithm, translating {scipy:: ::stats::kendalltau} function C++ using {Rcpp}.","code":""},{"path":"/articles/ici-kendalltau.html","id":"speed","dir":"Articles","previous_headings":"","what":"Speed","title":"Information-Content-Informed Kendall Tau Correlation","text":"","code":"x = rnorm(1000) y = rnorm(1000) library(microbenchmark) microbenchmark( cor(x, y, method = \"kendall\"), ici_kt(x, y, \"global\"), times = 5 ) #> Unit: microseconds #> expr min lq mean median #> cor(x, y, method = \"kendall\") 11716.612 12670.75 13230.1676 13124.263 #> ici_kt(x, y, \"global\") 231.868 233.36 276.8498 235.229 #> uq max neval #> 13485.116 15154.096 5 #> 268.721 415.071 5 all.equal(ici_kt(x, y, \"global\")[[1]], cor(x, y, method = \"kendall\")) #> [1] TRUE"},{"path":"/articles/ici-kendalltau.html","id":"running-many","dir":"Articles","previous_headings":"","what":"Running Many","title":"Information-Content-Informed Kendall Tau Correlation","text":"Just like R’s cor function, can also calculate correlations many variables. Let’s make fake data try .","code":"set.seed(1234) s1 = sort(rnorm(1000, mean = 100, sd = 10)) s2 = s1 + 10 s2[sample(length(s1), 100)] = s1[1:100] s3 = s1 s3[c(1:10, sample(length(s1), 5))] = NA matrix_1 = cbind(s1, s2, s3) r_1 = ici_kendalltau(matrix_1) r_1$cor #> s1 s2 s3 #> s1 1.0000000 0.8049209 0.9907488 #> s2 0.8049209 1.0000000 0.7956652 #> s3 0.9907488 0.7956652 0.9850000"},{"path":"/articles/ici-kendalltau.html","id":"parallelism","dir":"Articles","previous_headings":"","what":"Parallelism","title":"Information-Content-Informed Kendall Tau Correlation","text":"{future} {furrr} packages installed, also possible split set matrix comparisons across compute resources multiprocessing engine registered {future}.","code":"library(furrr) future::plan(multicore, workers = 4) r_2 = ici_kendalltau(matrix_1)"},{"path":"/articles/ici-kendalltau.html","id":"many-many-comparisons","dir":"Articles","previous_headings":"","what":"Many Many Comparisons","title":"Information-Content-Informed Kendall Tau Correlation","text":"case hundreds thousands comparisons done, result matrices can become , large, require lots memory storage. also inefficient, lower upper triangular components stored. alternative storage format data.frame, single row comparison performed. actually results stored internally, converted matrix form requested (default). keep data.frame output, add argument return_matrix=FALSE call ici_kendalltau.","code":"r_3 = ici_kendalltau(matrix_1, return_matrix = FALSE) r_3$cor #> s1 s2 core raw pvalue taumax cor #> 1 s1 s2 1 0.8049209 0 1.0000000 0.8049209 #> 2 s1 s3 1 0.9907488 0 0.9998949 0.9907488 #> 3 s2 s3 1 0.7956652 0 0.9998949 0.7956652 #> 4 s1 s1 0 1.0000000 0 1.0000000 1.0000000 #> 5 s2 s2 0 1.0000000 0 1.0000000 1.0000000 #> 6 s3 s3 0 0.9850000 0 1.0000000 0.9850000"},{"path":"/articles/ici-kendalltau.html","id":"logging","dir":"Articles","previous_headings":"","what":"Logging","title":"Information-Content-Informed Kendall Tau Correlation","text":"possible log steps done much memory used (Linux least) correlations calculated. can useful running large sets correlations making sure much memory isn’t used, example. enable logging, {logger} package must installed. log_file supplied, one created current date time. default, ici_kendalltau also shows progress messages, want turn , can :","code":"enable_logging() enable_logging(\"/tmp/my_ici_run.log\") show_progress(FALSE)"},{"path":"/articles/testing-for-left-censorship.html","id":"why","dir":"Articles","previous_headings":"","what":"Why?","title":"Testing for Left Censorship","text":"{ICIKendallTau} specific assumption, missing values largely due limit detection, result left-censorship. Therefore, used missing values left-censorship. Ideally, nice way test .","code":""},{"path":"/articles/testing-for-left-censorship.html","id":"strategy","dir":"Articles","previous_headings":"","what":"Strategy","title":"Testing for Left Censorship","text":"test , following (implemented test_left_censorship). feature missing one samples class samples, check non-missing entries sample’s median values. count entries sample medians successes, total number possible entries number trials binomial test, aggregating across features missing value. can perform one-tailed binomial test expectation successes greater 0.5.","code":""},{"path":"/articles/testing-for-left-censorship.html","id":"fake-data","dir":"Articles","previous_headings":"","what":"Fake Data","title":"Testing for Left Censorship","text":"start, need make fake data can evaluate test . make smallish dataset, 1000 features across 20 samples, 80% missing values due left-censored. sort initial data know can easily put missingness due left-censoring. also use log-normal distribution initially, just .","code":"set.seed(1234) n_feature = 1000 n_sample = 20 n_miss = 100 n_low = 80 test_dataset = rlnorm(n_feature, 10, 1) test_dataset = sort(test_dataset) noisy_dataset = add_uniform_noise(log(test_dataset), n_sample, 0.1) sample_medians = calculate_matrix_medians(noisy_dataset)"},{"path":"/articles/testing-for-left-censorship.html","id":"add-missingness","dir":"Articles","previous_headings":"","what":"Add Missingness","title":"Testing for Left Censorship","text":"Now can sample low high indices add missingness, well samples add . can actually visualize missingness using naniar package.","code":"low_indices = sample(seq_len(300), n_low) hi_indices = sample(seq(800, 1000), n_miss - n_low) all_indices = c(low_indices, hi_indices) sample_indices = sample(n_sample, n_miss, replace = TRUE) missing_dataset = noisy_dataset for (i_loc in seq_along(all_indices)) { missing_dataset[all_indices[i_loc], sample_indices[i_loc]] = NA } naniar::vis_miss(as.data.frame(missing_dataset))"},{"path":"/articles/testing-for-left-censorship.html","id":"test","dir":"Articles","previous_headings":"","what":"Test","title":"Testing for Left Censorship","text":"Now can actually test missingness entries.","code":"missing_test = test_left_censorship(missing_dataset) missing_test #> $values #> trials success class #> 1 1900 1520 A #> #> $binomial_test #> #> Exact binomial test #> #> data: total_success and total_trials #> number of successes = 1520, number of trials = 1900, p-value < 2.2e-16 #> alternative hypothesis: true probability of success is greater than 0.5 #> 95 percent confidence interval: #> 0.7843033 1.0000000 #> sample estimates: #> probability of success #> 0.8"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Robert M Flight. Author, maintainer. Hunter NB Moseley. Author.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Flight RM, Moseley HN (2024). ICIKendallTau: Calculates information-content-informed Kendall-tau. R package version 1.1.0, https://moseleybioinformaticslab.github.io/ICIKendallTau/https://github.com/moseleybioinformaticslab/ICIKendallTau/.","code":"@Manual{, title = {ICIKendallTau: Calculates information-content-informed Kendall-tau}, author = {Robert M Flight and Hunter NB Moseley}, year = {2024}, note = {R package version 1.1.0}, url = {https://moseleybioinformaticslab.github.io/ICIKendallTau/ https://github.com/moseleybioinformaticslab/ICIKendallTau/}, }"},{"path":"/index.html","id":"icikendalltau","dir":"","previous_headings":"","what":"Calculates information-content-informed Kendall-tau","title":"Calculates information-content-informed Kendall-tau","text":"can see pkgdown site .","code":""},{"path":"/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Calculates information-content-informed Kendall-tau","text":"can install current version ICIKendallTau via GitHub: can also install Windows Mac binaries using r-universe:","code":"remotes::install_github(\"MoseleyBioinformaticsLab/ICIKendallTau\") options(repos = c( moseleybioinformaticslab = 'https://moseleybioinformaticslab.r-universe.dev', CRAN = \"https://cloud.r-project.org\")) install.packages(\"ICIKendallTau\")"},{"path":"/index.html","id":"problem","dir":"","previous_headings":"","what":"Problem","title":"Calculates information-content-informed Kendall-tau","text":"handle missing data (.e. NA’s) calculating correlation two variables. However, whether observation present missing semi-quantitative information many analytical measurements sensitivity limits. .e. many cases, missing observations “missing--random”, “missing---random” due falling detection limit. cases, NA informative. Therefore, analytical measurements (gene expression, proteomics, metabolomics), missing measurements included, contribute correlation. want read solve problem, see package vignette.","code":""},{"path":"/index.html","id":"package-functions","dir":"","previous_headings":"","what":"Package Functions","title":"Calculates information-content-informed Kendall-tau","text":"functions implement include: option perspective control NA values influence ties. comparing samples, likely want use perspective = \"global\". Rows features, columns samples. library(furrr) plan(multiprocess) Otherwise use single core. ’ve also included function testing missingness data comes left-censorship, test_left_censorship. walk creating example data testing vignette Testing Left Censorship.","code":""},{"path":"/index.html","id":"examples","dir":"","previous_headings":"","what":"Examples","title":"Calculates information-content-informed Kendall-tau","text":"common case large matrix independent samples (columns) measured features samples (.e. gene expression). make artificial data show correlation changes introduce missingness. Now introduce missing values low end one. just simplest thing introduce NA values bottom set.","code":"set.seed(1234) library(ICIKendallTau) s1 = sort(rnorm(1000, mean = 100, sd = 10)) s2 = s1 + 10 matrix_1 = cbind(s1, s2) r_1 = ici_kendalltau(matrix_1) r_1$cor #> s1 s2 #> s1 1 1 #> s2 1 1 s3 = s1 s3[sample(100, 50)] = NA s4 = s2 s4[sample(100, 50)] = NA matrix_2 = cbind(s3, s4) r_2 = ici_kendalltau(matrix_2) r_2$cor #> s3 s4 #> s3 1.0000000 0.9944616 #> s4 0.9944616 1.0000000"},{"path":"/index.html","id":"is-it-fast","dir":"","previous_headings":"","what":"Is It Fast?","title":"Calculates information-content-informed Kendall-tau","text":"C++ code implementation (thanks {Rcpp}!) based SciPy implementation, uses two merge sorts ranks vector, looks differences . fastest method know , complexity O(nlogn). naive way computing , explicitly examines pairs, complexity n^2. implementation compared {pcaPP::cov.fk} function, use {Rcpp} inefficient copying vectors makes 3X slower one. honestly isn’t bad. case 40,000 features, average time modern CPU 14 milliseconds. course, want use calculate Kendall-tau-b without incorporating missingness, can just fine well. also provide kt_fast function, want something treats NA values similarly stats::cor.","code":"library(microbenchmark) x = rnorm(1000) y = rnorm(1000) x2 = rnorm(40000) y2 = rnorm(40000) library(microbenchmark) microbenchmark( cor(x, y, method = \"kendall\"), ici_kt(x, y, \"global\"), ici_kt(x2, y2, \"global\"), times = 5 ) #> Unit: microseconds #> expr min lq mean median uq max neval #> cor(x, y, method = \"kendall\") 11685.244 12730.878 12860.9060 13071.630 13406.514 13410.264 5 #> ici_kt(x, y, \"global\") 263.306 268.503 332.1288 274.858 283.589 570.388 5 #> ici_kt(x2, y2, \"global\") 14110.743 14322.836 15782.6490 16053.907 16595.979 17829.780 5 k_tau = ici_kt(x, y, \"global\") all.equal(k_tau[[1]] ,cor(x, y, method = \"kendall\")) #> [1] TRUE k_tau_fast = kt_fast(x, y) k_tau_fast #> tau pvalue #> -0.003411411 0.871672260"},{"path":"/index.html","id":"parallelism","dir":"","previous_headings":"","what":"Parallelism","title":"Calculates information-content-informed Kendall-tau","text":"{future} {furrr} packages installed, also possible split set matrix comparisons across compute resources multiprocessing engine registered {future}.","code":"library(furrr) future::plan(multicore, workers = 4) r_3 = ici_kendalltau(matrix_2)"},{"path":"/index.html","id":"many-many-comparisons","dir":"","previous_headings":"","what":"Many Many Comparisons","title":"Calculates information-content-informed Kendall-tau","text":"case hundreds thousands comparisons done, result matrices can become , large, require lots memory storage. also inefficient, lower upper triangular components stored. alternative storage format data.frame, single row comparison performed. actually results stored internally, converted matrix form requested (default). keep data.frame output, add argument return_matrix=FALSE call ici_kendalltau.","code":"r_4 = ici_kendalltau(matrix_2, return_matrix = FALSE) r_4 #> $cor #> s1 s2 core raw pvalue taumax cor #> 1 s3 s4 1 0.9924359 0 0.997963 0.9944616 #> 2 s3 s3 0 1.0000000 0 1.000000 1.0000000 #> 3 s4 s4 0 1.0000000 0 1.000000 1.0000000 #> #> $run_time #> [1] 0.01747489"},{"path":"/index.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Calculates information-content-informed Kendall-tau","text":"Please note ICIKendallTau project released Contributor Code Conduct. contributing project, agree abide terms.","code":""},{"path":"/reference/add_uniform_noise.html","id":null,"dir":"Reference","previous_headings":"","what":"Add uniform noise — add_uniform_noise","title":"Add uniform noise — add_uniform_noise","text":"Adds uniform noise values, generating replicates noise added original.","code":""},{"path":"/reference/add_uniform_noise.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add uniform noise — add_uniform_noise","text":"","code":"add_uniform_noise(value, n_rep, sd, use_zero = FALSE)"},{"path":"/reference/add_uniform_noise.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add uniform noise — add_uniform_noise","text":"value single vector numeric values n_rep number replicates make (numeric). Default 1. sd standard deviation data use_zero logical, returned values around zero ?","code":""},{"path":"/reference/add_uniform_noise.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add uniform noise — add_uniform_noise","text":"numeric matrix","code":""},{"path":"/reference/calculate_matrix_medians.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate matrix medians — calculate_matrix_medians","title":"Calculate matrix medians — calculate_matrix_medians","text":"Given matrix data, calculates median value column row.","code":""},{"path":"/reference/calculate_matrix_medians.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate matrix medians — calculate_matrix_medians","text":"","code":"calculate_matrix_medians(in_matrix, use = \"col\", ...)"},{"path":"/reference/calculate_matrix_medians.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate matrix medians — calculate_matrix_medians","text":"in_matrix numeric matrix values use character \"col\" \"row\" defining columns rows ... extra parameters median function","code":""},{"path":"/reference/calculate_matrix_medians.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate matrix medians — calculate_matrix_medians","text":"numeric","code":""},{"path":"/reference/cor_matrix_2_long_df.html","id":null,"dir":"Reference","previous_headings":"","what":"convert matrix to data.frame — cor_matrix_2_long_df","title":"convert matrix to data.frame — cor_matrix_2_long_df","text":"Given square correlation matrix, converts long data.frame, three columns.","code":""},{"path":"/reference/cor_matrix_2_long_df.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"convert matrix to data.frame — cor_matrix_2_long_df","text":"","code":"cor_matrix_2_long_df(in_matrix)"},{"path":"/reference/cor_matrix_2_long_df.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"convert matrix to data.frame — cor_matrix_2_long_df","text":"in_matrix correlation matrix","code":""},{"path":"/reference/cor_matrix_2_long_df.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"convert matrix to data.frame — cor_matrix_2_long_df","text":"data.frame","code":""},{"path":"/reference/cor_matrix_2_long_df.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"convert matrix to data.frame — cor_matrix_2_long_df","text":"data.frame contains three columns: s1: first entry comparison s2: second entry comparison cor: correlation value","code":""},{"path":"/reference/disable_logging.html","id":null,"dir":"Reference","previous_headings":"","what":"turn logging off — disable_logging","title":"turn logging off — disable_logging","text":"may good reasons turn logging turned . basically tells package logger available.","code":""},{"path":"/reference/disable_logging.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"turn logging off — disable_logging","text":"","code":"disable_logging()"},{"path":"/reference/enable_logging.html","id":null,"dir":"Reference","previous_headings":"","what":"turn logging on — enable_logging","title":"turn logging on — enable_logging","text":"Choose enable logging, specific file desired.","code":""},{"path":"/reference/enable_logging.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"turn logging on — enable_logging","text":"","code":"enable_logging(log_file = NULL, memory = FALSE)"},{"path":"/reference/enable_logging.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"turn logging on — enable_logging","text":"log_file file log memory provide memory logging ? available Linux MacOS","code":""},{"path":"/reference/enable_logging.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"turn logging on — enable_logging","text":"Uses logger package hood, suggested dependencies. logging enabled nice see things starting stopping, exactly done, without needing write messages console. especially useful getting errors, really see , can add \"memory\" logging see running memory. Default log file pattern: YYYY.MM.DD.HH.MM.SS_ICIKendallTau_run.log","code":""},{"path":"/reference/ici_kendalltau.html","id":null,"dir":"Reference","previous_headings":"","what":"information-content-informed kendall tau — ici_kendalltau","title":"information-content-informed kendall tau — ici_kendalltau","text":"Given data-matrix, computes information-theoretic Kendall-tau-b samples.","code":""},{"path":"/reference/ici_kendalltau.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"information-content-informed kendall tau — ici_kendalltau","text":"","code":"ici_kendalltau( data_matrix, global_na = c(NA, Inf, 0), perspective = \"global\", scale_max = TRUE, diag_good = TRUE, include_only = NULL, check_timing = FALSE, return_matrix = TRUE )"},{"path":"/reference/ici_kendalltau.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"information-content-informed kendall tau — ici_kendalltau","text":"data_matrix matrix data.frame values, samples columns, features rows global_na numeric vector defines globally, treated NA? perspective treat missing data denominator ties, character scale_max logical, everything scaled compared maximum correlation? diag_good logical, diagonal entries reflect many entries sample \"good\"? include_only run correlations include members (vector) combinations (list data.frame) check_timing logical determine try estimate run time full dataset? (default FALSE) return_matrix logical, data.frame matrix result returned?","code":""},{"path":"/reference/ici_kendalltau.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"information-content-informed kendall tau — ici_kendalltau","text":"list cor, raw, pval, taumax","code":""},{"path":"/reference/ici_kendalltau.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"information-content-informed kendall tau — ici_kendalltau","text":"details, see vignette vignette(\"ici-kendalltau\", package = \"ICIKendallTau\")) default global_na includes values data replace NA Kendall-tau calculation. default global_na = c(NA, Inf, 0). want replace something 0, example, might use global_na = c(NA, Inf, -2), values -2 replaced instead 0. check_timing = TRUE, 5 random pairwise comparisons run generate timings single core, estimates long full set take calculated. data returned data.frame, low side, provide good idea long data take. Returned list containing matrices : cor: scaled correlations raw: raw kendall-tau correlations pval: p-values taumax: theoretical maximum kendall-tau value possible Eventually, plan provide two parameters replacing values, feature_na feature specific NA values sample_na sample specific NA values. want know missing values data possibly due left-censorship, recommend testing hypothesis test_left_censorship() first.","code":""},{"path":[]},{"path":"/reference/ici_kendalltau.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"information-content-informed kendall tau — ici_kendalltau","text":"","code":"if (FALSE) { # not run set.seed(1234) s1 = sort(rnorm(1000, mean = 100, sd = 10)) s2 = s1 + 10 matrix_1 = cbind(s1, s2) r_1 = ici_kendalltau(matrix_1) r_1$cor # s1 s2 # s1 1 1 # s2 1 1 names(r_1) # \"cor\", \"raw\", \"pval\", \"taumax\", \"keep\", \"run_time\" s3 = s1 s3[sample(100, 50)] = NA s4 = s2 s4[sample(100, 50)] = NA matrix_2 = cbind(s3, s4) r_2 = ici_kendalltau(matrix_2) r_2$cor # s3 s4 # s3 1.0000000 0.9944616 # s4 0.9944616 1.0000000 # using include_only set.seed(1234) x = t(matrix(rnorm(5000), nrow = 100, ncol = 50)) colnames(x) = paste0(\"s\", seq(1, nrow(x))) # only calculate correlations of other columns with \"s1\" include_s1 = \"s1\" s1_only = ici_kendalltau(x, include_only = include_s1) # include s1 and s3 things both include_s1s3 = c(\"s1\", \"s3\") s1s3_only = ici_kendalltau(x, include_only = include_s1s3) # only specify certain pairs either as a list include_pairs = list(g1 = \"s1\", g2 = c(\"s2\", \"s3\")) s1_other = ici_kendalltau(x, include_only = include_pairs) # or a data.frame include_df = as.data.frame(list(g1 = \"s1\", g2 = c(\"s2\", \"s3\"))) s1_df = ici_kendalltau(x, include_only = include_df) }"},{"path":"/reference/ici_kendalltau_ref.html","id":null,"dir":"Reference","previous_headings":"","what":"information-content-informed kendall tau — ici_kendalltau_ref","title":"information-content-informed kendall tau — ici_kendalltau_ref","text":"Given data-matrix, computes information-content-informed (ICI) Kendall-tau-b samples.","code":""},{"path":"/reference/ici_kendalltau_ref.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"information-content-informed kendall tau — ici_kendalltau_ref","text":"","code":"ici_kendalltau_ref( data_matrix, global_na = c(NA, Inf, 0), zero_value = 0, perspective = \"global\", scale_max = TRUE, diag_good = TRUE, progress = FALSE )"},{"path":"/reference/ici_kendalltau_ref.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"information-content-informed kendall tau — ici_kendalltau_ref","text":"data_matrix samples columns, features rows global_na values treated missing (NA)? zero_value actual zero value? perspective treat missing data denominator ties, see details scale_max everything scaled compared maximum correlation? diag_good diagonal entries reflect many entries sample \"good\"? progress progress displayed.","code":""},{"path":"/reference/ici_kendalltau_ref.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"information-content-informed kendall tau — ici_kendalltau_ref","text":"numeric","code":""},{"path":"/reference/ici_kendalltau_ref.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"information-content-informed kendall tau — ici_kendalltau_ref","text":"details, see ICI-Kendall-tau vignette: vignette(\"ici-kendalltau\", package = \"ICIKendallTau\")","code":""},{"path":"/reference/ici_kt.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculates ici-kendall-tau — ici_kt","title":"Calculates ici-kendall-tau — ici_kt","text":"Calculates ici-kendall-tau","code":""},{"path":"/reference/ici_kt.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculates ici-kendall-tau — ici_kt","text":"","code":"ici_kt( x, y, perspective = \"local\", alternative = \"two.sided\", continuity = FALSE, output = \"simple\" )"},{"path":"/reference/ici_kt.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculates ici-kendall-tau — ici_kt","text":"x numeric vector y numeric vector perspective consider \"local\" \"global\" perspective? alternative alternative p-value test? continuity logical: true, continuity correction used output used control reporting values debugging","code":""},{"path":"/reference/ici_kt.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculates ici-kendall-tau — ici_kt","text":"kendall tau correlation, p-value, max-correlation","code":""},{"path":"/reference/ici_kt.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Calculates ici-kendall-tau — ici_kt","text":"Calculates information-content-informed Kendall-tau correlation measure. correlation based concordant discordant ranked pairs, like Kendall-tau, also includes missing values (NA). Missing values assumed primarily due lack detection due instrumental sensitivity, therefore encode information. details see ICI-Kendall-tau vignette: browseVignettes(\"ICIKendallTau\")","code":""},{"path":"/reference/ici_kt.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculates ici-kendall-tau — ici_kt","text":"","code":"x = sort(rnorm(100)) y = x + 1 y2 = y y2[1:10] = NA ici_kt(x, y) #> tau pvalue tau_max #> 1.000000e+00 3.480281e-49 1.000000e+00 ici_kt(x, y2, \"global\") #> tau pvalue tau_max #> 9.954442e-01 2.228308e-48 9.954442e-01 ici_kt(x, y2) #> tau pvalue tau_max #> 9.954442e-01 2.228308e-48 9.954442e-01"},{"path":"/reference/kt_fast.html","id":null,"dir":"Reference","previous_headings":"","what":"fast kendall tau — kt_fast","title":"fast kendall tau — kt_fast","text":"Uses underlying c++ implementation ici_kt provide fast version Kendall-tau correlation.","code":""},{"path":"/reference/kt_fast.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"fast kendall tau — kt_fast","text":"","code":"kt_fast(x, y = NULL, use = \"everything\", return_matrix = TRUE)"},{"path":"/reference/kt_fast.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"fast kendall tau — kt_fast","text":"x numeric vector, matrix, data frame. y NULL (default) vector. use optional character string giving method computing correlations presence missing values. must (abbreviation ) one strings \"everything\", \".obs\", \"complete.obs\", \"pairwise.complete.obs\". return_matrix matrices values returned, long data.frame","code":""},{"path":"/reference/kt_fast.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"fast kendall tau — kt_fast","text":"named vector list matrices.","code":""},{"path":"/reference/kt_fast.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"fast kendall tau — kt_fast","text":"Although interface mostly identical built-stats::cor() method, differences. providing x y, assumed single vectors. NA values present, function error, either remove return NA, depending option. \"na..complete\" valid option use. named vector named list matrices returned, tau pvalue values.","code":""},{"path":"/reference/log_memory.html","id":null,"dir":"Reference","previous_headings":"","what":"log memory usage — log_memory","title":"log memory usage — log_memory","text":"Logs amount memory used log file available, generating warnings amount RAM hits zero.","code":""},{"path":"/reference/log_memory.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"log memory usage — log_memory","text":"","code":"log_memory()"},{"path":"/reference/log_message.html","id":null,"dir":"Reference","previous_headings":"","what":"log messages — log_message","title":"log messages — log_message","text":"log_appender available, logs given message info level.","code":""},{"path":"/reference/log_message.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"log messages — log_message","text":"","code":"log_message(message_string)"},{"path":"/reference/log_message.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"log messages — log_message","text":"message_string string put message","code":""},{"path":"/reference/long_df_2_cor_matrix.html","id":null,"dir":"Reference","previous_headings":"","what":"convert data.frame to matrix — long_df_2_cor_matrix","title":"convert data.frame to matrix — long_df_2_cor_matrix","text":"Given long data.frame, converts possibly square correlation matrix","code":""},{"path":"/reference/long_df_2_cor_matrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"convert data.frame to matrix — long_df_2_cor_matrix","text":"","code":"long_df_2_cor_matrix(long_df, is_square = TRUE)"},{"path":"/reference/long_df_2_cor_matrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"convert data.frame to matrix — long_df_2_cor_matrix","text":"long_df long data.frame is_square square matrix?","code":""},{"path":"/reference/long_df_2_cor_matrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"convert data.frame to matrix — long_df_2_cor_matrix","text":"matrix","code":""},{"path":"/reference/missing_dataset.html","id":null,"dir":"Reference","previous_headings":"","what":"Example Dataset With Missingness — missing_dataset","title":"Example Dataset With Missingness — missing_dataset","text":"example dataset missingness left-censorship","code":""},{"path":"/reference/missing_dataset.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example Dataset With Missingness — missing_dataset","text":"","code":"missing_dataset"},{"path":[]},{"path":"/reference/missing_dataset.html","id":"missing-dataset","dir":"Reference","previous_headings":"","what":"missing_dataset","title":"Example Dataset With Missingness — missing_dataset","text":"matrix 1000 rows 20 columns, rows features columns samples.","code":""},{"path":"/reference/missing_dataset.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Example Dataset With Missingness — missing_dataset","text":"Robert M Flight","code":""},{"path":"/reference/pairwise_completeness.html","id":null,"dir":"Reference","previous_headings":"","what":"pairwise completeness — pairwise_completeness","title":"pairwise completeness — pairwise_completeness","text":"Calculates completeness two samples using \"\", entry missing either X \"\" Y.","code":""},{"path":"/reference/pairwise_completeness.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"pairwise completeness — pairwise_completeness","text":"","code":"pairwise_completeness( data_matrix, global_na = c(NA, Inf, 0), include_only = NULL, return_matrix = TRUE )"},{"path":"/reference/pairwise_completeness.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"pairwise completeness — pairwise_completeness","text":"data_matrix samples columns, features rows global_na globally, treated NA? include_only certain comparisons ? return_matrix matrix data.frame returned?","code":""},{"path":"/reference/pairwise_completeness.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"pairwise completeness — pairwise_completeness","text":"matrix degree completeness","code":""},{"path":[]},{"path":"/reference/show_progress.html","id":null,"dir":"Reference","previous_headings":"","what":"turn progress on off — show_progress","title":"turn progress on off — show_progress","text":"Allow user turn progress messages console . Default provide messages console.","code":""},{"path":"/reference/show_progress.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"turn progress on off — show_progress","text":"","code":"show_progress(progress = TRUE)"},{"path":"/reference/show_progress.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"turn progress on off — show_progress","text":"progress logical ","code":""},{"path":"/reference/test_left_censorship.html","id":null,"dir":"Reference","previous_headings":"","what":"Test for left censorship — test_left_censorship","title":"Test for left censorship — test_left_censorship","text":"binomial test check likely cause missing values due values limit detection, coming left-censored distribution.","code":""},{"path":"/reference/test_left_censorship.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Test for left censorship — test_left_censorship","text":"","code":"test_left_censorship(in_data, sample_classes = NULL, global_na = c(NA, Inf, 0))"},{"path":"/reference/test_left_censorship.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Test for left censorship — test_left_censorship","text":"in_data matrix data.frame numeric data sample_classes samples class global_na represents zero missing?","code":""},{"path":"/reference/test_left_censorship.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Test for left censorship — test_left_censorship","text":"data.frame trials / successes, binom.test result","code":""},{"path":"/reference/test_left_censorship.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Test for left censorship — test_left_censorship","text":"feature missing group samples, save possibility test. sample, calculate median value missing values removed. feature missing value, test whether remaining non-missing values sample median samples feature non-missing. binomial test considers total number features instances (minus missing values) number trials, number features sample medians number successes. bit detail vignette: vignette(\"testing--left-censorship\", package = \"ICIKendallTau\")","code":""},{"path":[]},{"path":"/reference/test_left_censorship.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Test for left censorship — test_left_censorship","text":"","code":"# this example has 80% missing due to left-censorship data(missing_dataset) missingness = test_left_censorship(missing_dataset) missingness$values #> trials success class #> 1 1900 1520 A missingness$binomial_test #> #> \tExact binomial test #> #> data: total_success and total_trials #> number of successes = 1520, number of trials = 1900, p-value < 2.2e-16 #> alternative hypothesis: true probability of success is greater than 0.5 #> 95 percent confidence interval: #> 0.7843033 1.0000000 #> sample estimates: #> probability of success #> 0.8 #>"},{"path":"/news/index.html","id":"icikendalltau-110","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 1.1.0","title":"ICIKendallTau 1.1.0","text":"adds function test_left_censorship verify ici_kendalltau appropriate use data .","code":""},{"path":"/news/index.html","id":"icikendalltau-100","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 1.0.0","title":"ICIKendallTau 1.0.0","text":"Calculates correlation columns matrix, rows.","code":""},{"path":"/news/index.html","id":"icikendalltau-0320","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.3.20","title":"ICIKendallTau 0.3.20","text":"kt_fast now uses data.frame format output ici_kendalltau, returns matrix default. data.frame useful large amounts comparisons run.","code":""},{"path":"/news/index.html","id":"icikendalltau-030","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.3.0","title":"ICIKendallTau 0.3.0","text":"Handling parallel execution differently avoid really large matrix issues core. Introduces return_matrix parameter ici_kendalltau allows return results form data.frame instead lists matrices.","code":""},{"path":"/news/index.html","id":"icikendalltau-0210","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.2.10","title":"ICIKendallTau 0.2.10","text":"Provide kt_fast function handles missing NA values similarly stats::cor function, uses ici_kt fast function underneath.","code":""},{"path":"/news/index.html","id":"icikendalltau-028","dir":"Changelog","previous_headings":"","what":"ICIKendalltau 0.2.8","title":"ICIKendalltau 0.2.8","text":"Made error outputs length default output containing NA. Check case one variables ici_kt identical arguments, warns user returns NA.","code":""},{"path":"/news/index.html","id":"icikendalltau-021","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.2.1","title":"ICIKendallTau 0.2.1","text":"Added new argument include_only ici_kendalltau allows user define pairwise correlations actually .","code":""},{"path":"/news/index.html","id":"icikendalltau-0117","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.1.17","title":"ICIKendallTau 0.1.17","text":"switched theme. updated installation instructions use r-universe.","code":""},{"path":"/news/index.html","id":"icikendalltau-0116","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.1.16","title":"ICIKendallTau 0.1.16","text":"updating documentation examples","code":""},{"path":"/news/index.html","id":"icikendalltau-014","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.1.4","title":"ICIKendallTau 0.1.4","text":"Switched pairwise_completeness also use global_na parameter. Oh yes, function pairwise_completeness enable scaling completeness two samples.","code":""},{"path":"/news/index.html","id":"icikendalltau-012","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.1.2","title":"ICIKendallTau 0.1.2","text":"Updated API use single variable, global_na defines values set NA correlation calculation. big API change, bumped version 0.1.","code":""},{"path":"/news/index.html","id":"icikendalltau-006","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.0.6","title":"ICIKendallTau 0.0.6","text":"Fixing bug 55,000 elements vector, match overflows 32 bit 64 bit, results make sense.","code":""},{"path":"/news/index.html","id":"icikendalltau-005","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.0.5","title":"ICIKendallTau 0.0.5","text":"Fixed bug instead returning two element vector, returned zero length value. probably happened one entries contained NA values, tried pass “vectors” less two entries.","code":""},{"path":"/news/index.html","id":"icikendalltau-004","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.0.4","title":"ICIKendallTau 0.0.4","text":"Fixing things came R CMD check documentation. good now (hope).","code":""},{"path":"/news/index.html","id":"icikendalltau-003","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.0.3","title":"ICIKendallTau 0.0.3","text":"Added tests! Removed bunch code wasn’t necessary using incorrect formula’s, correct version ici_kt_pairs function reference purposes.","code":""},{"path":"/news/index.html","id":"icikendalltau-002","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.0.2","title":"ICIKendallTau 0.0.2","text":"Fixed issue warning issued {furrr} installed.","code":""},{"path":"/news/index.html","id":"icikendalltau-001","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.0.1","title":"ICIKendallTau 0.0.1","text":"First release. ’s tested used rather thoroughly , ’d say ’s still rather alpha. Even though use time . Added NEWS.md file track changes package.","code":""}] +[{"path":[]},{"path":"/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement rflight79@gmail.com. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.0, available https://www.contributor-covenant.org/version/2/0/ code_of_conduct.html. Community Impact Guidelines inspired Mozilla’s code conduct enforcement ladder. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https:// www.contributor-covenant.org/translations.","code":""},{"path":"/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) Robert M Flight & Hunter NB Moseley Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"/articles/ici-kendalltau.html","id":"problem","dir":"Articles","previous_headings":"","what":"Problem","title":"Information-Content-Informed Kendall Tau Correlation","text":"handle missing data (.e. NA’s) calculating correlation two variables. However, whether observation made semi-quantitative information many analytical measurements sensitivity limits. .e. many cases, missing observations “missing--random”, “missing---random” due falling detection limit. cases, NA informative.","code":""},{"path":"/articles/ici-kendalltau.html","id":"approach","dir":"Articles","previous_headings":"","what":"Approach","title":"Information-Content-Informed Kendall Tau Correlation","text":"Kendall Tau Correlation coefficient calculates correlation based number concordant discordant pairs: \\(\\tau = \\frac{ | pairs_{concordant} | - | pairs_{discordant} |}{\\binom{n}{2}}\\) pair two x-y data points. \\(x_i > x_j\\) \\(y_i > y_j\\) \\(x_i < x_j\\) \\(y_i < y_j\\) \\(x_i > x_j\\) \\(y_i < y_j\\) \\(x_i < x_j\\) \\(y_i > y_j\\) definitions can expanded handle missing observations: \\(x_i > x_j\\) \\(y_i > y_j\\) \\(x_i < x_j\\) \\(y_i < y_j\\) \\(x_i > x_j\\) \\(y_i \\& !y_j\\) \\(x_i < x_j\\) \\(!y_i \\& y_j\\) \\(x_i \\& !x_j\\) \\(y_i > y_j\\) \\(!x_i \\& x_j\\) \\(y_i < y_j\\) \\(x_i \\& !x_j\\) \\(y_i \\& !y_j\\) (used local perspective version) \\(x_i \\& x_j\\) \\(!y_i \\& y_j\\) (used local perspective version) \\(x_i > x_j\\) \\(y_i < y_j\\) \\(x_i < x_j\\) \\(y_i > y_j\\) \\(x_i > x_j\\) \\(!y_i \\& y_j\\) \\(x_i < x_j\\) \\(y_i \\& !y_j\\) \\(x_i \\& !x_j\\) \\(y_i < y_j\\) \\(!x_i \\& x_j\\) \\(y_i > y_j\\) \\(x_i \\& !x_j\\) \\(!y_i \\& y_j\\) (used local perspective version) \\(!x_i \\& x_j\\) \\(y_i \\& !y_j\\) (used local perspective version) Also data points missing x y values naturally reduce strength correlation value, since can neither concordant discordant another (NA, NA) pair, impact denominator. equivalent removing last two concordant discordant pair tests.","code":""},{"path":"/articles/ici-kendalltau.html","id":"handling-tied-values","dir":"Articles","previous_headings":"","what":"Handling Tied Values","title":"Information-Content-Informed Kendall Tau Correlation","text":"base Kendall tau correlation must adjusted handle tied values, ie. tau-b version equation. \\[\\tau = \\frac{ | pairs_{concordant} | - | pairs_{discordant} |}{\\sqrt{ ( n_0 - n_{xtie} ) ( n_0 - n_{ytie} )}} \\] : \\(n_0 = \\frac{n \\left ( n - 1 \\right )}{2}\\) \\(n_{xtie} = \\sum_{}^{} \\frac{t_{xi} \\left ( t_{xi} - 1 \\right )}{2}\\) \\(n_{ytie} = \\sum_{}^{} \\frac{t_{yi} \\left ( t_{yi} - 1 \\right )}{2}\\) \\(t_{xi}\\) - size ith group tied x values. \\(t_{yi}\\) - size ith group tied y values. local perspective, number NAs x y can treated group tied values calculation \\(n_{xtie}\\) \\(n_{ytie}\\), respectively.","code":""},{"path":"/articles/ici-kendalltau.html","id":"scaling-by-the-correlation-with-the-highest-information-content","dir":"Articles","previous_headings":"","what":"Scaling by the correlation with the highest information content","title":"Information-Content-Informed Kendall Tau Correlation","text":"generating correlation matrix (heatmap) large analytical datasets, number observations common can become quite low two variables. becomes advantageous scale pair variables highest information content. One objective scaling factor highest possible absolute correlation maximum information content observed across dataset, dividing maximum possible absolute correlation scale whole dataset appropriately. \\[maxcorr = \\frac{\\binom{n-m}{2} + n * m}{\\binom{n-m}{2} + n * m + \\binom{m}{2}}\\] : Choose two variables least number missing values across dataset. n length variables. formula based perfect correlation given number (NA,NA) pairs added.","code":""},{"path":"/articles/ici-kendalltau.html","id":"functions","dir":"Articles","previous_headings":"","what":"Functions","title":"Information-Content-Informed Kendall Tau Correlation","text":"functions implement include: option perspective control NA values influence ties. comparing multiple samples, likely want use perspective = \"global\". Rows features, columns samples. library(furrr) plan(multiprocess) Otherwise use single core. ’ve also included function testing missingness data comes left-censorship, test_left_censorship. walk creating example data testing vignette Testing Left Censorship.","code":""},{"path":"/articles/ici-kendalltau.html","id":"implementation","dir":"Articles","previous_headings":"","what":"Implementation","title":"Information-Content-Informed Kendall Tau Correlation","text":"turns , think really hard, truly necessary replace missing values vector value smaller minimum value one. local version, first remove common missing values vector. C++ implementation explicitly can speed, instead wrapping {stats::cor} function. also use double merge-sort algorithm, translating {scipy:: ::stats::kendalltau} function C++ using {Rcpp}.","code":""},{"path":"/articles/ici-kendalltau.html","id":"speed","dir":"Articles","previous_headings":"","what":"Speed","title":"Information-Content-Informed Kendall Tau Correlation","text":"","code":"x = rnorm(1000) y = rnorm(1000) library(microbenchmark) microbenchmark( cor(x, y, method = \"kendall\"), ici_kt(x, y, \"global\"), times = 5 ) #> Unit: microseconds #> expr min lq mean median #> cor(x, y, method = \"kendall\") 12613.54 13228.435 13407.3404 13229.615 #> ici_kt(x, y, \"global\") 268.12 296.982 333.6532 300.057 #> uq max neval #> 13589.473 14375.637 5 #> 303.269 499.838 5 all.equal(ici_kt(x, y, \"global\")[[1]], cor(x, y, method = \"kendall\")) #> [1] TRUE"},{"path":"/articles/ici-kendalltau.html","id":"running-many","dir":"Articles","previous_headings":"","what":"Running Many","title":"Information-Content-Informed Kendall Tau Correlation","text":"Just like R’s cor function, can also calculate correlations many variables. Let’s make fake data try .","code":"set.seed(1234) s1 = sort(rnorm(1000, mean = 100, sd = 10)) s2 = s1 + 10 s2[sample(length(s1), 100)] = s1[1:100] s3 = s1 s3[c(1:10, sample(length(s1), 5))] = NA matrix_1 = cbind(s1, s2, s3) r_1 = ici_kendalltau(matrix_1) r_1$cor #> s1 s2 s3 #> s1 1.0000000 0.8049209 0.9907488 #> s2 0.8049209 1.0000000 0.7956652 #> s3 0.9907488 0.7956652 0.9850000"},{"path":"/articles/ici-kendalltau.html","id":"parallelism","dir":"Articles","previous_headings":"","what":"Parallelism","title":"Information-Content-Informed Kendall Tau Correlation","text":"{future} {furrr} packages installed, also possible split set matrix comparisons across compute resources multiprocessing engine registered {future}.","code":"library(furrr) future::plan(multicore, workers = 4) r_2 = ici_kendalltau(matrix_1)"},{"path":"/articles/ici-kendalltau.html","id":"many-many-comparisons","dir":"Articles","previous_headings":"","what":"Many Many Comparisons","title":"Information-Content-Informed Kendall Tau Correlation","text":"case hundreds thousands comparisons done, result matrices can become , large, require lots memory storage. also inefficient, lower upper triangular components stored. alternative storage format data.frame, single row comparison performed. actually results stored internally, converted matrix form requested (default). keep data.frame output, add argument return_matrix=FALSE call ici_kendalltau.","code":"r_3 = ici_kendalltau(matrix_1, return_matrix = FALSE) r_3$cor #> s1 s2 core raw pvalue taumax cor #> 1 s1 s2 1 0.8049209 0 1.0000000 0.8049209 #> 2 s1 s3 1 0.9907488 0 0.9998949 0.9907488 #> 3 s2 s3 1 0.7956652 0 0.9998949 0.7956652 #> 4 s1 s1 0 1.0000000 0 1.0000000 1.0000000 #> 5 s2 s2 0 1.0000000 0 1.0000000 1.0000000 #> 6 s3 s3 0 0.9850000 0 1.0000000 0.9850000"},{"path":"/articles/ici-kendalltau.html","id":"logging","dir":"Articles","previous_headings":"","what":"Logging","title":"Information-Content-Informed Kendall Tau Correlation","text":"possible log steps done much memory used (Linux least) correlations calculated. can useful running large sets correlations making sure much memory isn’t used, example. enable logging, {logger} package must installed. log_file supplied, one created current date time. default, ici_kendalltau also shows progress messages, want turn , can :","code":"enable_logging() enable_logging(\"/tmp/my_ici_run.log\") show_progress(FALSE)"},{"path":"/articles/testing-for-left-censorship.html","id":"why","dir":"Articles","previous_headings":"","what":"Why?","title":"Testing for Left Censorship","text":"{ICIKendallTau} specific assumption, missing values largely due limit detection, result left-censorship. Therefore, used missing values left-censorship. Ideally, nice way test .","code":""},{"path":"/articles/testing-for-left-censorship.html","id":"strategy","dir":"Articles","previous_headings":"","what":"Strategy","title":"Testing for Left Censorship","text":"test , following (implemented test_left_censorship). feature missing one samples class samples, check non-missing entries sample’s median values. count entries sample medians successes, total number possible entries number trials binomial test, aggregating across features missing value. can perform one-tailed binomial test expectation successes greater 0.5.","code":""},{"path":"/articles/testing-for-left-censorship.html","id":"fake-data","dir":"Articles","previous_headings":"","what":"Fake Data","title":"Testing for Left Censorship","text":"start, need make fake data can evaluate test . make smallish dataset, 1000 features across 20 samples, 80% missing values due left-censored. sort initial data know can easily put missingness due left-censoring. also use log-normal distribution initially, just .","code":"set.seed(1234) n_feature = 1000 n_sample = 20 n_miss = 100 n_low = 80 test_dataset = rlnorm(n_feature, 10, 1) test_dataset = sort(test_dataset) noisy_dataset = add_uniform_noise(log(test_dataset), n_sample, 0.1) sample_medians = calculate_matrix_medians(noisy_dataset)"},{"path":"/articles/testing-for-left-censorship.html","id":"add-missingness","dir":"Articles","previous_headings":"","what":"Add Missingness","title":"Testing for Left Censorship","text":"Now can sample low high indices add missingness, well samples add . can actually visualize missingness using visdat package. Missing entries test dataset.","code":"low_indices = sample(seq_len(300), n_low) hi_indices = sample(seq(800, 1000), n_miss - n_low) all_indices = c(low_indices, hi_indices) sample_indices = sample(n_sample, n_miss, replace = TRUE) missing_dataset = noisy_dataset for (i_loc in seq_along(all_indices)) { missing_dataset[all_indices[i_loc], sample_indices[i_loc]] = NA } visdat::vis_miss(as.data.frame(missing_dataset))"},{"path":"/articles/testing-for-left-censorship.html","id":"test","dir":"Articles","previous_headings":"","what":"Test","title":"Testing for Left Censorship","text":"Now can actually test missingness entries.","code":"missing_test = test_left_censorship(missing_dataset) missing_test #> $values #> trials success class #> 1 1900 1520 A #> #> $binomial_test #> #> Exact binomial test #> #> data: total_success and total_trials #> number of successes = 1520, number of trials = 1900, p-value < 2.2e-16 #> alternative hypothesis: true probability of success is greater than 0.5 #> 95 percent confidence interval: #> 0.7843033 1.0000000 #> sample estimates: #> probability of success #> 0.8"},{"path":"/articles/testing-for-left-censorship.html","id":"rna-seq-example","dir":"Articles","previous_headings":"","what":"RNA-Seq Example","title":"Testing for Left Censorship","text":"Gierliński et al (link) really neat experiment yeast, large number replicates, 48 WT 48 SNF2 deletion mutant. ’ve included summarized experiment part package, use demonstrate left-censorship real data context. addition testing, can re-order features based median ranking across samples. Missingness yeast data set, ordering. Missingness yeast data set, ordering features rank. Feature rank function number missing values.","code":"data(yeast_missing) yeast_classes = rep(c(\"snf2\", \"wt\"), each = 48) yeast_stats = test_left_censorship(yeast_missing, sample_classes = yeast_classes) yeast_stats #> $values #> trials success class #> 1 18336 18336 snf2 #> 2 20424 20424 wt #> #> $binomial_test #> #> Exact binomial test #> #> data: total_success and total_trials #> number of successes = 38760, number of trials = 38760, p-value < #> 2.2e-16 #> alternative hypothesis: true probability of success is greater than 0.5 #> 95 percent confidence interval: #> 0.9999227 1.0000000 #> sample estimates: #> probability of success #> 1 yeast_order = rank_order_data(yeast_missing) visdat::vis_miss(as.data.frame(yeast_order$original)) visdat::vis_miss(as.data.frame(yeast_order$ordered)) yeast_order$n_na_rank |> ggplot(aes(x = n_na, y = median_rank)) + geom_point()"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Robert M Flight. Author, maintainer. Hunter NB Moseley. Author.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Flight RM, Moseley HN (2024). ICIKendallTau: Calculates information-content-informed Kendall-tau. R package version 1.1.2, https://moseleybioinformaticslab.github.io/ICIKendallTau/https://github.com/moseleybioinformaticslab/ICIKendallTau/.","code":"@Manual{, title = {ICIKendallTau: Calculates information-content-informed Kendall-tau}, author = {Robert M Flight and Hunter NB Moseley}, year = {2024}, note = {R package version 1.1.2}, url = {https://moseleybioinformaticslab.github.io/ICIKendallTau/ https://github.com/moseleybioinformaticslab/ICIKendallTau/}, }"},{"path":"/index.html","id":"icikendalltau","dir":"","previous_headings":"","what":"Calculates information-content-informed Kendall-tau","title":"Calculates information-content-informed Kendall-tau","text":"can see pkgdown site .","code":""},{"path":"/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Calculates information-content-informed Kendall-tau","text":"can install current version ICIKendallTau via GitHub: can also install Windows Mac binaries using r-universe:","code":"remotes::install_github(\"MoseleyBioinformaticsLab/ICIKendallTau\") options(repos = c( moseleybioinformaticslab = 'https://moseleybioinformaticslab.r-universe.dev', CRAN = \"https://cloud.r-project.org\")) install.packages(\"ICIKendallTau\")"},{"path":"/index.html","id":"problem","dir":"","previous_headings":"","what":"Problem","title":"Calculates information-content-informed Kendall-tau","text":"handle missing data (.e. NA’s) calculating correlation two variables. However, whether observation present missing semi-quantitative information many analytical measurements sensitivity limits. .e. many cases, missing observations “missing--random”, “missing---random” due falling detection limit. cases, NA informative. Therefore, analytical measurements (gene expression, proteomics, metabolomics), missing measurements included, contribute correlation. want read solve problem, see package vignette.","code":""},{"path":"/index.html","id":"package-functions","dir":"","previous_headings":"","what":"Package Functions","title":"Calculates information-content-informed Kendall-tau","text":"functions implement include: option perspective control NA values influence ties. comparing samples, likely want use perspective = \"global\". Rows features, columns samples. library(furrr) plan(multiprocess) Otherwise use single core. ’ve also included function testing missingness data comes left-censorship, test_left_censorship. walk creating example data testing vignette Testing Left Censorship. addition testing, can also visualize missing data pattern feature rank using rank_order_data function, use visdat::vis_miss() original reordered missing data.","code":""},{"path":"/index.html","id":"examples","dir":"","previous_headings":"","what":"Examples","title":"Calculates information-content-informed Kendall-tau","text":"common case large matrix independent samples (columns) measured features samples (.e. gene expression). make artificial data show correlation changes introduce missingness. Now introduce missing values low end one. just simplest thing introduce NA values bottom set.","code":"set.seed(1234) library(ICIKendallTau) s1 = sort(rnorm(1000, mean = 100, sd = 10)) s2 = s1 + 10 matrix_1 = cbind(s1, s2) r_1 = ici_kendalltau(matrix_1) r_1$cor #> s1 s2 #> s1 1 1 #> s2 1 1 s3 = s1 s3[sample(100, 50)] = NA s4 = s2 s4[sample(100, 50)] = NA matrix_2 = cbind(s3, s4) r_2 = ici_kendalltau(matrix_2) r_2$cor #> s3 s4 #> s3 1.0000000 0.9944616 #> s4 0.9944616 1.0000000"},{"path":"/index.html","id":"is-it-fast","dir":"","previous_headings":"","what":"Is It Fast?","title":"Calculates information-content-informed Kendall-tau","text":"C++ code implementation (thanks {Rcpp}!) based SciPy implementation, uses two merge sorts ranks vector, looks differences . fastest method know , complexity O(nlogn). naive way computing , explicitly examines pairs, complexity n^2. implementation compared {pcaPP::cov.fk} function, use {Rcpp} inefficient copying vectors makes 3X slower one. honestly isn’t bad. case 40,000 features, average time modern CPU 14 milliseconds. course, want use calculate Kendall-tau-b without incorporating missingness, can just fine well. also provide kt_fast function, want something treats NA values similarly stats::cor.","code":"library(microbenchmark) x = rnorm(1000) y = rnorm(1000) x2 = rnorm(40000) y2 = rnorm(40000) library(microbenchmark) microbenchmark( cor(x, y, method = \"kendall\"), ici_kt(x, y, \"global\"), ici_kt(x2, y2, \"global\"), times = 5 ) #> Unit: microseconds #> expr min lq mean median uq max neval #> cor(x, y, method = \"kendall\") 11666.371 11671.671 12405.0886 12084.801 13276.95 13325.649 5 #> ici_kt(x, y, \"global\") 253.826 255.717 430.9002 277.985 317.24 1049.733 5 #> ici_kt(x2, y2, \"global\") 13405.302 13731.770 15208.7108 14693.928 15415.66 18796.894 5 k_tau = ici_kt(x, y, \"global\") all.equal(k_tau[[1]] ,cor(x, y, method = \"kendall\")) #> [1] TRUE k_tau_fast = kt_fast(x, y) k_tau_fast #> tau pvalue #> -0.003411411 0.871672260"},{"path":"/index.html","id":"parallelism","dir":"","previous_headings":"","what":"Parallelism","title":"Calculates information-content-informed Kendall-tau","text":"{future} {furrr} packages installed, also possible split set matrix comparisons across compute resources multiprocessing engine registered {future}.","code":"library(furrr) future::plan(multicore, workers = 4) r_3 = ici_kendalltau(matrix_2)"},{"path":"/index.html","id":"many-many-comparisons","dir":"","previous_headings":"","what":"Many Many Comparisons","title":"Calculates information-content-informed Kendall-tau","text":"case hundreds thousands comparisons done, result matrices can become , large, require lots memory storage. also inefficient, lower upper triangular components stored. alternative storage format data.frame, single row comparison performed. actually results stored internally, converted matrix form requested (default). keep data.frame output, add argument return_matrix=FALSE call ici_kendalltau.","code":"r_4 = ici_kendalltau(matrix_2, return_matrix = FALSE) r_4 #> $cor #> s1 s2 core raw pvalue taumax cor #> 1 s3 s4 1 0.9924359 0 0.997963 0.9944616 #> 2 s3 s3 0 1.0000000 0 1.000000 1.0000000 #> 3 s4 s4 0 1.0000000 0 1.000000 1.0000000 #> #> $run_time #> [1] 0.01783729"},{"path":"/index.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Calculates information-content-informed Kendall-tau","text":"Please note ICIKendallTau project released Contributor Code Conduct. contributing project, agree abide terms.","code":""},{"path":"/reference/add_uniform_noise.html","id":null,"dir":"Reference","previous_headings":"","what":"Add uniform noise — add_uniform_noise","title":"Add uniform noise — add_uniform_noise","text":"Adds uniform noise values, generating replicates noise added original.","code":""},{"path":"/reference/add_uniform_noise.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add uniform noise — add_uniform_noise","text":"","code":"add_uniform_noise(value, n_rep, sd, use_zero = FALSE)"},{"path":"/reference/add_uniform_noise.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add uniform noise — add_uniform_noise","text":"value single vector numeric values n_rep number replicates make (numeric). Default 1. sd standard deviation data use_zero logical, returned values around zero ?","code":""},{"path":"/reference/add_uniform_noise.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add uniform noise — add_uniform_noise","text":"numeric matrix","code":""},{"path":"/reference/calculate_matrix_medians.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate matrix medians — calculate_matrix_medians","title":"Calculate matrix medians — calculate_matrix_medians","text":"Given matrix data, calculates median value column row.","code":""},{"path":"/reference/calculate_matrix_medians.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate matrix medians — calculate_matrix_medians","text":"","code":"calculate_matrix_medians(in_matrix, use = \"col\", ...)"},{"path":"/reference/calculate_matrix_medians.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate matrix medians — calculate_matrix_medians","text":"in_matrix numeric matrix values use character \"col\" \"row\" defining columns rows ... extra parameters median function","code":""},{"path":"/reference/calculate_matrix_medians.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculate matrix medians — calculate_matrix_medians","text":"numeric","code":""},{"path":"/reference/cor_matrix_2_long_df.html","id":null,"dir":"Reference","previous_headings":"","what":"convert matrix to data.frame — cor_matrix_2_long_df","title":"convert matrix to data.frame — cor_matrix_2_long_df","text":"Given square correlation matrix, converts long data.frame, three columns.","code":""},{"path":"/reference/cor_matrix_2_long_df.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"convert matrix to data.frame — cor_matrix_2_long_df","text":"","code":"cor_matrix_2_long_df(in_matrix)"},{"path":"/reference/cor_matrix_2_long_df.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"convert matrix to data.frame — cor_matrix_2_long_df","text":"in_matrix correlation matrix","code":""},{"path":"/reference/cor_matrix_2_long_df.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"convert matrix to data.frame — cor_matrix_2_long_df","text":"data.frame","code":""},{"path":"/reference/cor_matrix_2_long_df.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"convert matrix to data.frame — cor_matrix_2_long_df","text":"data.frame contains three columns: s1: first entry comparison s2: second entry comparison cor: correlation value","code":""},{"path":"/reference/disable_logging.html","id":null,"dir":"Reference","previous_headings":"","what":"turn logging off — disable_logging","title":"turn logging off — disable_logging","text":"may good reasons turn logging turned . basically tells package logger available.","code":""},{"path":"/reference/disable_logging.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"turn logging off — disable_logging","text":"","code":"disable_logging()"},{"path":"/reference/enable_logging.html","id":null,"dir":"Reference","previous_headings":"","what":"turn logging on — enable_logging","title":"turn logging on — enable_logging","text":"Choose enable logging, specific file desired.","code":""},{"path":"/reference/enable_logging.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"turn logging on — enable_logging","text":"","code":"enable_logging(log_file = NULL, memory = FALSE)"},{"path":"/reference/enable_logging.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"turn logging on — enable_logging","text":"log_file file log memory provide memory logging ? available Linux MacOS","code":""},{"path":"/reference/enable_logging.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"turn logging on — enable_logging","text":"Uses logger package hood, suggested dependencies. logging enabled nice see things starting stopping, exactly done, without needing write messages console. especially useful getting errors, really see , can add \"memory\" logging see running memory. Default log file pattern: YYYY.MM.DD.HH.MM.SS_ICIKendallTau_run.log","code":""},{"path":"/reference/ici_kendalltau.html","id":null,"dir":"Reference","previous_headings":"","what":"information-content-informed kendall tau — ici_kendalltau","title":"information-content-informed kendall tau — ici_kendalltau","text":"Given data-matrix, computes information-theoretic Kendall-tau-b samples.","code":""},{"path":"/reference/ici_kendalltau.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"information-content-informed kendall tau — ici_kendalltau","text":"","code":"ici_kendalltau( data_matrix, global_na = c(NA, Inf, 0), perspective = \"global\", scale_max = TRUE, diag_good = TRUE, include_only = NULL, check_timing = FALSE, return_matrix = TRUE )"},{"path":"/reference/ici_kendalltau.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"information-content-informed kendall tau — ici_kendalltau","text":"data_matrix matrix data.frame values, samples columns, features rows global_na numeric vector defines globally, treated NA? perspective treat missing data denominator ties, character scale_max logical, everything scaled compared maximum correlation? diag_good logical, diagonal entries reflect many entries sample \"good\"? include_only run correlations include members (vector) combinations (list data.frame) check_timing logical determine try estimate run time full dataset? (default FALSE) return_matrix logical, data.frame matrix result returned?","code":""},{"path":"/reference/ici_kendalltau.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"information-content-informed kendall tau — ici_kendalltau","text":"list cor, raw, pval, taumax","code":""},{"path":"/reference/ici_kendalltau.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"information-content-informed kendall tau — ici_kendalltau","text":"details, see vignette vignette(\"ici-kendalltau\", package = \"ICIKendallTau\")) default global_na includes values data replace NA Kendall-tau calculation. default global_na = c(NA, Inf, 0). want replace something 0, example, might use global_na = c(NA, Inf, -2), values -2 replaced instead 0. check_timing = TRUE, 5 random pairwise comparisons run generate timings single core, estimates long full set take calculated. data returned data.frame, low side, provide good idea long data take. Returned list containing matrices : cor: scaled correlations raw: raw kendall-tau correlations pval: p-values taumax: theoretical maximum kendall-tau value possible Eventually, plan provide two parameters replacing values, feature_na feature specific NA values sample_na sample specific NA values. want know missing values data possibly due left-censorship, recommend testing hypothesis test_left_censorship() first.","code":""},{"path":[]},{"path":"/reference/ici_kendalltau.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"information-content-informed kendall tau — ici_kendalltau","text":"","code":"if (FALSE) { # not run set.seed(1234) s1 = sort(rnorm(1000, mean = 100, sd = 10)) s2 = s1 + 10 matrix_1 = cbind(s1, s2) r_1 = ici_kendalltau(matrix_1) r_1$cor # s1 s2 # s1 1 1 # s2 1 1 names(r_1) # \"cor\", \"raw\", \"pval\", \"taumax\", \"keep\", \"run_time\" s3 = s1 s3[sample(100, 50)] = NA s4 = s2 s4[sample(100, 50)] = NA matrix_2 = cbind(s3, s4) r_2 = ici_kendalltau(matrix_2) r_2$cor # s3 s4 # s3 1.0000000 0.9944616 # s4 0.9944616 1.0000000 # using include_only set.seed(1234) x = t(matrix(rnorm(5000), nrow = 100, ncol = 50)) colnames(x) = paste0(\"s\", seq(1, nrow(x))) # only calculate correlations of other columns with \"s1\" include_s1 = \"s1\" s1_only = ici_kendalltau(x, include_only = include_s1) # include s1 and s3 things both include_s1s3 = c(\"s1\", \"s3\") s1s3_only = ici_kendalltau(x, include_only = include_s1s3) # only specify certain pairs either as a list include_pairs = list(g1 = \"s1\", g2 = c(\"s2\", \"s3\")) s1_other = ici_kendalltau(x, include_only = include_pairs) # or a data.frame include_df = as.data.frame(list(g1 = \"s1\", g2 = c(\"s2\", \"s3\"))) s1_df = ici_kendalltau(x, include_only = include_df) }"},{"path":"/reference/ici_kendalltau_ref.html","id":null,"dir":"Reference","previous_headings":"","what":"information-content-informed kendall tau — ici_kendalltau_ref","title":"information-content-informed kendall tau — ici_kendalltau_ref","text":"Given data-matrix, computes information-content-informed (ICI) Kendall-tau-b samples.","code":""},{"path":"/reference/ici_kendalltau_ref.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"information-content-informed kendall tau — ici_kendalltau_ref","text":"","code":"ici_kendalltau_ref( data_matrix, global_na = c(NA, Inf, 0), zero_value = 0, perspective = \"global\", scale_max = TRUE, diag_good = TRUE, progress = FALSE )"},{"path":"/reference/ici_kendalltau_ref.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"information-content-informed kendall tau — ici_kendalltau_ref","text":"data_matrix samples columns, features rows global_na values treated missing (NA)? zero_value actual zero value? perspective treat missing data denominator ties, see details scale_max everything scaled compared maximum correlation? diag_good diagonal entries reflect many entries sample \"good\"? progress progress displayed.","code":""},{"path":"/reference/ici_kendalltau_ref.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"information-content-informed kendall tau — ici_kendalltau_ref","text":"numeric","code":""},{"path":"/reference/ici_kendalltau_ref.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"information-content-informed kendall tau — ici_kendalltau_ref","text":"details, see ICI-Kendall-tau vignette: vignette(\"ici-kendalltau\", package = \"ICIKendallTau\")","code":""},{"path":"/reference/ici_kt.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculates ici-kendall-tau — ici_kt","title":"Calculates ici-kendall-tau — ici_kt","text":"Calculates ici-kendall-tau","code":""},{"path":"/reference/ici_kt.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculates ici-kendall-tau — ici_kt","text":"","code":"ici_kt( x, y, perspective = \"local\", alternative = \"two.sided\", continuity = FALSE, output = \"simple\" )"},{"path":"/reference/ici_kt.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculates ici-kendall-tau — ici_kt","text":"x numeric vector y numeric vector perspective consider \"local\" \"global\" perspective? alternative alternative p-value test? continuity logical: true, continuity correction used output used control reporting values debugging","code":""},{"path":"/reference/ici_kt.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Calculates ici-kendall-tau — ici_kt","text":"kendall tau correlation, p-value, max-correlation","code":""},{"path":"/reference/ici_kt.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Calculates ici-kendall-tau — ici_kt","text":"Calculates information-content-informed Kendall-tau correlation measure. correlation based concordant discordant ranked pairs, like Kendall-tau, also includes missing values (NA). Missing values assumed primarily due lack detection due instrumental sensitivity, therefore encode information. details see ICI-Kendall-tau vignette: browseVignettes(\"ICIKendallTau\")","code":""},{"path":"/reference/ici_kt.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Calculates ici-kendall-tau — ici_kt","text":"","code":"x = sort(rnorm(100)) y = x + 1 y2 = y y2[1:10] = NA ici_kt(x, y) #> tau pvalue tau_max #> 1.000000e+00 3.480281e-49 1.000000e+00 ici_kt(x, y2, \"global\") #> tau pvalue tau_max #> 9.954442e-01 2.228308e-48 9.954442e-01 ici_kt(x, y2) #> tau pvalue tau_max #> 9.954442e-01 2.228308e-48 9.954442e-01"},{"path":"/reference/kt_fast.html","id":null,"dir":"Reference","previous_headings":"","what":"fast kendall tau — kt_fast","title":"fast kendall tau — kt_fast","text":"Uses underlying c++ implementation ici_kt provide fast version Kendall-tau correlation.","code":""},{"path":"/reference/kt_fast.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"fast kendall tau — kt_fast","text":"","code":"kt_fast(x, y = NULL, use = \"everything\", return_matrix = TRUE)"},{"path":"/reference/kt_fast.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"fast kendall tau — kt_fast","text":"x numeric vector, matrix, data frame. y NULL (default) vector. use optional character string giving method computing correlations presence missing values. must (abbreviation ) one strings \"everything\", \".obs\", \"complete.obs\", \"pairwise.complete.obs\". return_matrix matrices values returned, long data.frame","code":""},{"path":"/reference/kt_fast.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"fast kendall tau — kt_fast","text":"named vector list matrices.","code":""},{"path":"/reference/kt_fast.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"fast kendall tau — kt_fast","text":"Although interface mostly identical built-stats::cor() method, differences. providing x y, assumed single vectors. NA values present, function error, either remove return NA, depending option. \"na..complete\" valid option use. named vector named list matrices returned, tau pvalue values.","code":""},{"path":"/reference/log_memory.html","id":null,"dir":"Reference","previous_headings":"","what":"log memory usage — log_memory","title":"log memory usage — log_memory","text":"Logs amount memory used log file available, generating warnings amount RAM hits zero.","code":""},{"path":"/reference/log_memory.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"log memory usage — log_memory","text":"","code":"log_memory()"},{"path":"/reference/log_message.html","id":null,"dir":"Reference","previous_headings":"","what":"log messages — log_message","title":"log messages — log_message","text":"log_appender available, logs given message info level.","code":""},{"path":"/reference/log_message.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"log messages — log_message","text":"","code":"log_message(message_string)"},{"path":"/reference/log_message.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"log messages — log_message","text":"message_string string put message","code":""},{"path":"/reference/long_df_2_cor_matrix.html","id":null,"dir":"Reference","previous_headings":"","what":"convert data.frame to matrix — long_df_2_cor_matrix","title":"convert data.frame to matrix — long_df_2_cor_matrix","text":"Given long data.frame, converts possibly square correlation matrix","code":""},{"path":"/reference/long_df_2_cor_matrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"convert data.frame to matrix — long_df_2_cor_matrix","text":"","code":"long_df_2_cor_matrix(long_df, is_square = TRUE)"},{"path":"/reference/long_df_2_cor_matrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"convert data.frame to matrix — long_df_2_cor_matrix","text":"long_df long data.frame is_square square matrix?","code":""},{"path":"/reference/long_df_2_cor_matrix.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"convert data.frame to matrix — long_df_2_cor_matrix","text":"matrix","code":""},{"path":"/reference/missing_dataset.html","id":null,"dir":"Reference","previous_headings":"","what":"Example Dataset With Missingness — missing_dataset","title":"Example Dataset With Missingness — missing_dataset","text":"example dataset missingness left-censorship","code":""},{"path":"/reference/missing_dataset.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example Dataset With Missingness — missing_dataset","text":"","code":"missing_dataset"},{"path":[]},{"path":"/reference/missing_dataset.html","id":"missing-dataset","dir":"Reference","previous_headings":"","what":"missing_dataset","title":"Example Dataset With Missingness — missing_dataset","text":"matrix 1000 rows 20 columns, rows features columns samples.","code":""},{"path":"/reference/missing_dataset.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Example Dataset With Missingness — missing_dataset","text":"Robert M Flight","code":""},{"path":"/reference/pairwise_completeness.html","id":null,"dir":"Reference","previous_headings":"","what":"pairwise completeness — pairwise_completeness","title":"pairwise completeness — pairwise_completeness","text":"Calculates completeness two samples using \"\", entry missing either X \"\" Y.","code":""},{"path":"/reference/pairwise_completeness.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"pairwise completeness — pairwise_completeness","text":"","code":"pairwise_completeness( data_matrix, global_na = c(NA, Inf, 0), include_only = NULL, return_matrix = TRUE )"},{"path":"/reference/pairwise_completeness.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"pairwise completeness — pairwise_completeness","text":"data_matrix samples columns, features rows global_na globally, treated NA? include_only certain comparisons ? return_matrix matrix data.frame returned?","code":""},{"path":"/reference/pairwise_completeness.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"pairwise completeness — pairwise_completeness","text":"matrix degree completeness","code":""},{"path":[]},{"path":"/reference/rank_order_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Rank order row data — rank_order_data","title":"Rank order row data — rank_order_data","text":"Given data-matrix numeric data, calculates rank row column (feature sample), gets median rank across columns, returns original data missing values set NA, reordered data, data.frame ranks feature number missing values.","code":""},{"path":"/reference/rank_order_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Rank order row data — rank_order_data","text":"","code":"rank_order_data(data_matrix, global_na = c(NA, Inf, 0))"},{"path":"/reference/rank_order_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Rank order row data — rank_order_data","text":"data_matrix matrix data.frame values global_na values consider missing","code":""},{"path":"/reference/rank_order_data.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Rank order row data — rank_order_data","text":"list two matrices data.frame","code":""},{"path":"/reference/show_progress.html","id":null,"dir":"Reference","previous_headings":"","what":"turn progress on off — show_progress","title":"turn progress on off — show_progress","text":"Allow user turn progress messages console . Default provide messages console.","code":""},{"path":"/reference/show_progress.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"turn progress on off — show_progress","text":"","code":"show_progress(progress = TRUE)"},{"path":"/reference/show_progress.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"turn progress on off — show_progress","text":"progress logical ","code":""},{"path":"/reference/test_left_censorship.html","id":null,"dir":"Reference","previous_headings":"","what":"Test for left censorship — test_left_censorship","title":"Test for left censorship — test_left_censorship","text":"binomial test check likely cause missing values due values limit detection, coming left-censored distribution.","code":""},{"path":"/reference/test_left_censorship.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Test for left censorship — test_left_censorship","text":"","code":"test_left_censorship( data_matrix, sample_classes = NULL, global_na = c(NA, Inf, 0) )"},{"path":"/reference/test_left_censorship.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Test for left censorship — test_left_censorship","text":"data_matrix matrix data.frame numeric data sample_classes samples class global_na represents zero missing?","code":""},{"path":"/reference/test_left_censorship.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Test for left censorship — test_left_censorship","text":"data.frame trials / successes, binom.test result","code":""},{"path":"/reference/test_left_censorship.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Test for left censorship — test_left_censorship","text":"feature missing group samples, save possibility test. sample, calculate median value missing values removed. feature missing value, test whether remaining non-missing values sample median samples feature non-missing. binomial test considers total number features instances (minus missing values) number trials, number features sample medians number successes. bit detail vignette: vignette(\"testing--left-censorship\", package = \"ICIKendallTau\")","code":""},{"path":[]},{"path":"/reference/test_left_censorship.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Test for left censorship — test_left_censorship","text":"","code":"# this example has 80% missing due to left-censorship data(missing_dataset) missingness = test_left_censorship(missing_dataset) missingness$values #> trials success class #> 1 1900 1520 A missingness$binomial_test #> #> \tExact binomial test #> #> data: total_success and total_trials #> number of successes = 1520, number of trials = 1900, p-value < 2.2e-16 #> alternative hypothesis: true probability of success is greater than 0.5 #> 95 percent confidence interval: #> 0.7843033 1.0000000 #> sample estimates: #> probability of success #> 0.8 #>"},{"path":"/reference/yeast_missing.html","id":null,"dir":"Reference","previous_headings":"","what":"Example RNA-Seq Dataset With Missingness — yeast_missing","title":"Example RNA-Seq Dataset With Missingness — yeast_missing","text":"example dataset RNA-seq experiment yeast, created Gierliński et al., \"Statistical models RNA-seq data derived two-condition 48-replicate experiment\", Bioinformatics, 31, 2015 https://doi.org/10.1093/bioinformatics/btv425.","code":""},{"path":"/reference/yeast_missing.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example RNA-Seq Dataset With Missingness — yeast_missing","text":"","code":"yeast_missing"},{"path":[]},{"path":"/reference/yeast_missing.html","id":"yeast-missing","dir":"Reference","previous_headings":"","what":"yeast_missing","title":"Example RNA-Seq Dataset With Missingness — yeast_missing","text":"matrix 6887 rows (genes) 96 columns (samples).","code":""},{"path":"/reference/yeast_missing.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Example RNA-Seq Dataset With Missingness — yeast_missing","text":"https://dx.doi.org/10.6084/M9.FIGSHARE.1425502.V1 https://dx.doi.org/10.6084/M9.FIGSHARE.1425503.V1","code":""},{"path":"/news/index.html","id":"icikendalltau-110","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 1.1.0","title":"ICIKendallTau 1.1.0","text":"adds function test_left_censorship verify ici_kendalltau appropriate use data .","code":""},{"path":"/news/index.html","id":"icikendalltau-100","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 1.0.0","title":"ICIKendallTau 1.0.0","text":"Calculates correlation columns matrix, rows.","code":""},{"path":"/news/index.html","id":"icikendalltau-0320","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.3.20","title":"ICIKendallTau 0.3.20","text":"kt_fast now uses data.frame format output ici_kendalltau, returns matrix default. data.frame useful large amounts comparisons run.","code":""},{"path":"/news/index.html","id":"icikendalltau-030","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.3.0","title":"ICIKendallTau 0.3.0","text":"Handling parallel execution differently avoid really large matrix issues core. Introduces return_matrix parameter ici_kendalltau allows return results form data.frame instead lists matrices.","code":""},{"path":"/news/index.html","id":"icikendalltau-0210","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.2.10","title":"ICIKendallTau 0.2.10","text":"Provide kt_fast function handles missing NA values similarly stats::cor function, uses ici_kt fast function underneath.","code":""},{"path":"/news/index.html","id":"icikendalltau-028","dir":"Changelog","previous_headings":"","what":"ICIKendalltau 0.2.8","title":"ICIKendalltau 0.2.8","text":"Made error outputs length default output containing NA. Check case one variables ici_kt identical arguments, warns user returns NA.","code":""},{"path":"/news/index.html","id":"icikendalltau-021","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.2.1","title":"ICIKendallTau 0.2.1","text":"Added new argument include_only ici_kendalltau allows user define pairwise correlations actually .","code":""},{"path":"/news/index.html","id":"icikendalltau-0117","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.1.17","title":"ICIKendallTau 0.1.17","text":"switched theme. updated installation instructions use r-universe.","code":""},{"path":"/news/index.html","id":"icikendalltau-0116","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.1.16","title":"ICIKendallTau 0.1.16","text":"updating documentation examples","code":""},{"path":"/news/index.html","id":"icikendalltau-014","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.1.4","title":"ICIKendallTau 0.1.4","text":"Switched pairwise_completeness also use global_na parameter. Oh yes, function pairwise_completeness enable scaling completeness two samples.","code":""},{"path":"/news/index.html","id":"icikendalltau-012","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.1.2","title":"ICIKendallTau 0.1.2","text":"Updated API use single variable, global_na defines values set NA correlation calculation. big API change, bumped version 0.1.","code":""},{"path":"/news/index.html","id":"icikendalltau-006","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.0.6","title":"ICIKendallTau 0.0.6","text":"Fixing bug 55,000 elements vector, match overflows 32 bit 64 bit, results make sense.","code":""},{"path":"/news/index.html","id":"icikendalltau-005","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.0.5","title":"ICIKendallTau 0.0.5","text":"Fixed bug instead returning two element vector, returned zero length value. probably happened one entries contained NA values, tried pass “vectors” less two entries.","code":""},{"path":"/news/index.html","id":"icikendalltau-004","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.0.4","title":"ICIKendallTau 0.0.4","text":"Fixing things came R CMD check documentation. good now (hope).","code":""},{"path":"/news/index.html","id":"icikendalltau-003","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.0.3","title":"ICIKendallTau 0.0.3","text":"Added tests! Removed bunch code wasn’t necessary using incorrect formula’s, correct version ici_kt_pairs function reference purposes.","code":""},{"path":"/news/index.html","id":"icikendalltau-002","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.0.2","title":"ICIKendallTau 0.0.2","text":"Fixed issue warning issued {furrr} installed.","code":""},{"path":"/news/index.html","id":"icikendalltau-001","dir":"Changelog","previous_headings":"","what":"ICIKendallTau 0.0.1","title":"ICIKendallTau 0.0.1","text":"First release. ’s tested used rather thoroughly , ’d say ’s still rather alpha. Even though use time . Added NEWS.md file track changes package.","code":""}] diff --git a/docs/sitemap.xml b/docs/sitemap.xml index e9b2467..9d2d588 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -75,6 +75,9 @@ /reference/pairwise_completeness.html + + /reference/rank_order_data.html + /reference/ref_diff_kendallt.html @@ -87,4 +90,7 @@ /reference/test_left_censorship.html + + /reference/yeast_missing.html + diff --git a/man/rank_order_data.Rd b/man/rank_order_data.Rd new file mode 100644 index 0000000..e03c8cb --- /dev/null +++ b/man/rank_order_data.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rank-ordering.R +\name{rank_order_data} +\alias{rank_order_data} +\title{Rank order row data} +\usage{ +rank_order_data(data_matrix, global_na = c(NA, Inf, 0)) +} +\arguments{ +\item{data_matrix}{matrix or data.frame of values} + +\item{global_na}{the values to consider as missing} +} +\value{ +list with two matrices and a data.frame +} +\description{ +Given a data-matrix of numeric data, calculates the rank of each row in each +column (feature in sample), gets the median rank across all columns, and +returns the original data with missing values set to NA, the reordered data, +and a data.frame of the ranks of each feature and the number of missing values. +} diff --git a/man/test_left_censorship.Rd b/man/test_left_censorship.Rd index d6d87da..4a0341d 100644 --- a/man/test_left_censorship.Rd +++ b/man/test_left_censorship.Rd @@ -4,10 +4,14 @@ \alias{test_left_censorship} \title{Test for left censorship} \usage{ -test_left_censorship(in_data, sample_classes = NULL, global_na = c(NA, Inf, 0)) +test_left_censorship( + data_matrix, + sample_classes = NULL, + global_na = c(NA, Inf, 0) +) } \arguments{ -\item{in_data}{matrix or data.frame of numeric data} +\item{data_matrix}{matrix or data.frame of numeric data} \item{sample_classes}{which samples are in which class} diff --git a/tests/testthat/test-left_censorship.R b/tests/testthat/test-left_censorship.R index ca6782c..3cb26b7 100644 --- a/tests/testthat/test-left_censorship.R +++ b/tests/testthat/test-left_censorship.R @@ -43,4 +43,14 @@ test_that("test_left_censorship works", { mix_dataset[back_zero] = 0 mix_binom = test_left_censorship(mix_dataset, global_na = c(0, NA)) expect_equal(na_binom, mix_binom) + + single_group_indices = withr::with_seed(1234, sample(n_sample / 2, n_miss, replace = TRUE)) + group_classes = rep(c("A", "B"), each = 10) + group_dataset = noisy_dataset + for (i_loc in seq_along(all_indices)) { + group_dataset[all_indices[i_loc], single_group_indices[i_loc]] = 0 + } + group_binom = test_left_censorship(group_dataset, sample_classes = group_classes) + expect_equal(nrow(group_binom$values), 2) + expect_equal(group_binom$values$success[1] / group_binom$values$trials[1], n_low / n_miss) }) diff --git a/vignettes/testing-for-left-censorship.Rmd b/vignettes/testing-for-left-censorship.Rmd index 67557d9..aacad1a 100644 --- a/vignettes/testing-for-left-censorship.Rmd +++ b/vignettes/testing-for-left-censorship.Rmd @@ -6,11 +6,12 @@ vignette: > %\VignetteIndexEntry{Testing for Left Censorship} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} - \usepackage[utf8]{naniar} + \usepackage[utf8]{visdat, ggplot2} --- ```{r, include = FALSE} knitr::opts_chunk$set( + message = FALSE, collapse = TRUE, comment = "#>" ) @@ -18,6 +19,7 @@ knitr::opts_chunk$set( ```{r setup} library(ICIKendallTau) +library(ggplot2) ``` ## Why? @@ -73,11 +75,12 @@ for (i_loc in seq_along(all_indices)) { } ``` -We can actually visualize the missingness using the `naniar` package. +We can actually visualize the missingness using the `visdat` package. ```{r} #| label: examine-missingness -naniar::vis_miss(as.data.frame(missing_dataset)) +#| fig-cap: Missing entries in the test dataset. +visdat::vis_miss(as.data.frame(missing_dataset)) ``` ## Test @@ -89,3 +92,47 @@ Now we can actually test the missingness entries. missing_test = test_left_censorship(missing_dataset) missing_test ``` + +## RNA-Seq Example + +Gierliński et al ([link](https://doi.org/10.1093/bioinformatics/btv425)) did a really neat experiment in yeast, with a large number of replicates, 48 WT and 48 SNF2 deletion mutant. +We've included a summarized experiment as part of this package, and use it to demonstrate the left-censorship in a real data context. + +```{r} +#| label: load-yeast +data(yeast_missing) +yeast_classes = rep(c("snf2", "wt"), each = 48) +``` + +```{r} +#| label: test-yeast +yeast_stats = test_left_censorship(yeast_missing, sample_classes = yeast_classes) +yeast_stats +``` + +In addition to testing, we can re-order the features based on their median ranking across samples. + +```{r} +#| label: order-yeast +yeast_order = rank_order_data(yeast_missing) +``` + +```{r} +#| label: fig-yeast-original +#| fig-cap: Missingness in yeast data set, no ordering. +visdat::vis_miss(as.data.frame(yeast_order$original)) +``` + +```{r} +#| label: fig-yeast-reordered +#| fig-cap: Missingness in yeast data set, after ordering features by rank. +visdat::vis_miss(as.data.frame(yeast_order$ordered)) +``` + +```{r} +#| label: fig-yeast-nna +#| fig-cap: Feature rank as a function of the number of missing values. +yeast_order$n_na_rank |> + ggplot(aes(x = n_na, y = median_rank)) + + geom_point() +```