diff --git a/adam-r/bdgenomics.adam/DESCRIPTION b/adam-r/bdgenomics.adam/DESCRIPTION index 18fb21c974..1ffc7d7e44 100644 --- a/adam-r/bdgenomics.adam/DESCRIPTION +++ b/adam-r/bdgenomics.adam/DESCRIPTION @@ -5,17 +5,17 @@ Title: R Frontend for Big Data Genomics/ADAM Description: ADAM is a genomics analysis platform with specialized file formats built using Apache Avro, Apache Spark and Parquet. Author: Big Data Genomics Maintainer: Frank Austin Nothaft -Authors@R: c(person("Frank", "Nothaft", role = c("aut", "cre"), +Authors@R: c(person("Frank Austin", "Nothaft", role = c("aut", "cre"), email = "fnothaft@alumni.stanford.edu"), person(family = "Big Data Genomics", role = c("aut", "cph"))) License: Apache License (== 2.0) URL: http://www.bdgenomics.org https://github.com/bigdatagenomics/adam BugReports: https://github.com/bigdatagenomics/adam/issues Imports: - methods -Depends: - R (>= 3.0), + methods, SparkR (>= 2.1.0) +Depends: + R (>= 3.0) Suggests: testthat Collate: diff --git a/adam-r/bdgenomics.adam/NAMESPACE b/adam-r/bdgenomics.adam/NAMESPACE index c496fce307..dc24dd4375 100644 --- a/adam-r/bdgenomics.adam/NAMESPACE +++ b/adam-r/bdgenomics.adam/NAMESPACE @@ -1,6 +1,40 @@ # Generated by roxygen2: do not edit by hand export(ADAMContext) +export(aggregatedCoverage) +export(collapse) +export(countKmers) +export(coverage) +export(createADAMContext) +export(flankAdjacentFragments) +export(flatten) +export(loadAlignments) +export(loadContigFragments) +export(loadCoverage) +export(loadFeatures) +export(loadFragments) +export(loadGenotypes) +export(loadVariants) +export(markDuplicates) +export(pipe) +export(realignIndels) +export(recalibrateBaseQualities) +export(save) +export(saveAsParquet) +export(saveAsSam) +export(saveAsVcf) +export(sort) +export(sortLexicographically) +export(sortReadsByReferencePosition) +export(sortReadsByReferencePositionAndIndex) +export(toCoverage) +export(toDF) +export(toFeatures) +export(toFragments) +export(toReads) +export(toVariantContexts) +export(transform) +export(transmute) exportClasses(ADAMContext) exportClasses(AlignmentRecordRDD) exportClasses(CoverageRDD) @@ -45,3 +79,8 @@ exportMethods(toReads) exportMethods(toVariantContexts) exportMethods(transform) exportMethods(transmute) +importFrom(SparkR,sparkR.callJMethod) +importFrom(SparkR,sparkR.callJStatic) +importFrom(SparkR,sparkR.newJObject) +importFrom(SparkR,sparkR.session) +importFrom(methods,new) diff --git a/adam-r/bdgenomics.adam/R/adam-context.R b/adam-r/bdgenomics.adam/R/adam-context.R index 837ee8bbc4..4cf59ef93f 100644 --- a/adam-r/bdgenomics.adam/R/adam-context.R +++ b/adam-r/bdgenomics.adam/R/adam-context.R @@ -23,10 +23,32 @@ setOldClass("jobj") #' @description The ADAMContext provides helper methods for loading in genomic #' data into a Spark RDD/Dataframe. #' @slot jac Java object reference to the backing JavaADAMContext. +#' +#' @rdname ADAMContext +#' #' @export setClass("ADAMContext", slots = list(jac = "jobj")) +#' Creates an ADAMContext by creating a SparkSession. +#' +#' @return Returns an ADAMContext. +#' +#' @importFrom SparkR sparkR.session +#' +#' @export +createADAMContext <- function() { + ADAMContext(sparkR.session()) +} + +#' Creates an ADAMContext from an existing SparkSession. +#' +#' @param ss The Spark Session to use to create the ADAMContext. +#' @return Returns an ADAMContext. +#' +#' @importFrom SparkR sparkR.callJMethod sparkR.newJObject +#' @importFrom methods new +#' #' @export ADAMContext <- function(ss) { ssc = sparkR.callJMethod(ss, "sparkContext") @@ -36,6 +58,7 @@ ADAMContext <- function(ss) { new("ADAMContext", jac = jac) } +#' @importFrom SparkR sparkR.callJStatic javaStringency <- function(stringency) { stringency <- sparkR.callJStatic("htsjdk.samtools.ValidationStringency", "valueOf", @@ -62,6 +85,8 @@ javaStringency <- function(stringency) { #' @param stringency The validation stringency to apply. Defaults to STRICT. #' @return Returns an RDD containing reads. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("loadAlignments", signature(ac = "ADAMContext", filePath = "character"), @@ -85,6 +110,8 @@ setMethod("loadAlignments", #' @param filePath The path to load the file from. #' @return Returns an RDD containing sequence fragments. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("loadContigFragments", signature(ac = "ADAMContext", filePath = "character"), @@ -109,6 +136,8 @@ setMethod("loadContigFragments", #' @param stringency The validation stringency to apply. Defaults to STRICT. #' @return Returns an RDD containing sequence fragments. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("loadFragments", signature(ac = "ADAMContext", filePath = "character"), @@ -141,6 +170,8 @@ setMethod("loadFragments", #' @param stringency The validation stringency to apply. Defaults to STRICT. #' @return Returns an RDD containing features. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("loadFeatures", signature(ac = "ADAMContext", filePath = "character"), @@ -174,6 +205,8 @@ setMethod("loadFeatures", #' @param stringency The validation stringency to apply. Defaults to STRICT. #' @return Returns an RDD containing coverage. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("loadCoverage", signature(ac = "ADAMContext", filePath = "character"), @@ -196,6 +229,8 @@ setMethod("loadCoverage", #' @param stringency The validation stringency to apply. Defaults to STRICT. #' @return Returns an RDD containing genotypes. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("loadGenotypes", signature(ac = "ADAMContext", filePath = "character"), @@ -218,6 +253,8 @@ setMethod("loadGenotypes", #' @param stringency The validation stringency to apply. Defaults to STRICT. #' @return Returns an RDD containing variants. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("loadVariants", signature(ac = "ADAMContext", filePath = "character"), diff --git a/adam-r/bdgenomics.adam/R/generics.R b/adam-r/bdgenomics.adam/R/generics.R index b1f8b8ed0d..6a005567da 100644 --- a/adam-r/bdgenomics.adam/R/generics.R +++ b/adam-r/bdgenomics.adam/R/generics.R @@ -18,50 +18,71 @@ #### ADAM Context operations #### -# @rdname ADAMContext -# @export +#' The ADAMContext provides functions on top of a SparkContext for loading genomic data. +#' +#' @name ADAMContext +NULL + +#' @rdname ADAMContext +#' @param ac The ADAMContext. +#' @param filePath The path to load the file from. +#' @param ... additional argument(s). +#' @export setGeneric("loadAlignments", function(ac, filePath, ...) { standardGeneric("loadAlignments") }) -# @rdname ADAMContext -# @export +#' @rdname ADAMContext +#' @export setGeneric("loadContigFragments", function(ac, filePath) { standardGeneric("loadContigFragments") }) -# @rdname ADAMContext -# @export +#' @rdname ADAMContext +#' @export setGeneric("loadFragments", function(ac, filePath, ...) { standardGeneric("loadFragments") }) -# @rdname ADAMContext -# @export +#' @rdname ADAMContext +#' @export setGeneric("loadFeatures", function(ac, filePath, ...) { standardGeneric("loadFeatures") }) -# @rdname ADAMContext -# @export +#' @rdname ADAMContext +#' @export setGeneric("loadCoverage", function(ac, filePath, ...) { standardGeneric("loadCoverage") }) -# @rdname ADAMContext -# @export +#' @rdname ADAMContext +#' @export setGeneric("loadGenotypes", function(ac, filePath, ...) { standardGeneric("loadGenotypes") }) -# @rdname ADAMContext -# @export +#' @rdname ADAMContext +#' @export setGeneric("loadVariants", function(ac, filePath, ...) { standardGeneric("loadVariants") }) #### RDD operations #### -# @rdname GenomicRDD -# @export +#' The GenomicRDD is the base class that all genomic datatypes extend from in ADAM. +#' +#' @name GenomicRDD +NULL + +#' @rdname GenomicRDD +#' @param cmd The command to run. +#' @param tFormatter The name of the ADAM in-formatter class to use. +#' @param xFormatter The name of the ADAM out-formatter class to use. +#' @param convFn The name of the ADAM GenomicRDD conversion class to +#' use. +#' @param ... additional argument(s). +#' @return Returns a new RDD where the input from the original RDD has +#' been piped through a command that runs locally on each executor. +#' @export setGeneric("pipe", function(ardd, cmd, tFormatter, xFormatter, convFn, ...) { standardGeneric("pipe") }) -# @rdname GenomicRDD -# @export +#' @rdname GenomicRDD +#' @export setGeneric("toDF", function(ardd) { standardGeneric("toDF") }) @@ -71,8 +92,8 @@ setGeneric("replaceRdd", setGeneric("wrapTransformation", function(ardd, tFn) { standardGeneric("wrapTransformation") }) -# @rdname GenomicRDD -# @export +#' @rdname GenomicRDD +#' @export setGeneric("transform", function(ardd, tFn) { standardGeneric("transform") }) @@ -82,130 +103,167 @@ setGeneric("inferConversionFn", setGeneric("destClassSuffix", function(destClass) { standardGeneric("destClassSuffix") }) -# @rdname GenomicRDD -# @export +#' @rdname GenomicRDD +#' @param tFn A function that transforms the underlying RDD as a DataFrame. +#' @param destClass The destination class of this transmutation. +#' @export setGeneric("transmute", function(ardd, tFn, destClass, ...) { standardGeneric("transmute") }) -# @rdname GenomicRDD -# @export +#' @rdname GenomicRDD +#' @export setGeneric("save", function(ardd, filePath, ...) { standardGeneric("save") }) -# @rdname GenomicRDD -# @export +#' @rdname GenomicRDD +#' @export setGeneric("sort", function(ardd) { standardGeneric("sort") }) -# @rdname GenomicRDD -# @export +#' @rdname GenomicRDD +#' @export setGeneric("sortLexicographically", function(ardd) { standardGeneric("sortLexicographically") }) +#' Saves this RDD to disk as Parquet. +#' +#' @param ardd The RDD to apply this to. +#' @param filePath Path to save file to. +#' +#' @rdname GenomicRDD +#' +#' @export +setGeneric("saveAsParquet", + function(ardd, filePath) { standardGeneric("saveAsParquet") }) + #### AlignmentRecord operations #### -# @rdname AlignmentRecordRDD -# @export +#' The AlignmentRecordRDD is the class used to manipulate genomic read data. +#' +#' @name AlignmentRecordRDD +NULL + +#' @rdname AlignmentRecordRDD +#' @export setGeneric("toFragments", function(ardd) { standardGeneric("toFragments") }) -# @rdname AlignmentRecordRDD -# @export +#' @rdname AlignmentRecordRDD +#' @param ardd The RDD to apply this to. +#' @param ... additional argument(s). +#' @export setGeneric("toCoverage", function(ardd, ...) { standardGeneric("toCoverage") }) -# @rdname AlignmentRecordRDD -# @export +#' @rdname AlignmentRecordRDD +#' @param kmerLength The value of _k_ to use for cutting _k_-mers. +#' @export setGeneric("countKmers", function(ardd, kmerLength) { standardGeneric("countKmers") }) -# @rdname AlignmentRecordRDD -# @export +#' @rdname AlignmentRecordRDD +#' @param filePath The path to save the file to. +#' @export setGeneric("saveAsSam", function(ardd, filePath, ...) { standardGeneric("saveAsSam") }) -# @rdname AlignmentRecordRDD-transforms -# @export +#' @rdname AlignmentRecordRDD +#' @export setGeneric("sortReadsByReferencePosition", function(ardd) { standardGeneric("sortReadsByReferencePosition") }) -# @rdname AlignmentRecordRDD-transforms -# @export +#' @rdname AlignmentRecordRDD +#' @export setGeneric("sortReadsByReferencePositionAndIndex", function(ardd) { standardGeneric("sortReadsByReferencePositionAndIndex") }) -# @rdname AlignmentRecordRDD-transforms -# @export +#' @rdname AlignmentRecordRDD +#' @export setGeneric("markDuplicates", function(ardd) { standardGeneric("markDuplicates") }) -# @rdname AlignmentRecordRDD-transforms -# @export +#' @rdname AlignmentRecordRDD +#' @param knownSnps A table of known SNPs to mask valid variants. +#' @param validationStringency The stringency to apply towards validating BQSR. +#' @export setGeneric("recalibrateBaseQualities", function(ardd, knownSnps, validationStringency) { standardGeneric("recalibrateBaseQualities") }) -# @rdname AlignmentRecordRDD-transforms -# @export +#' @rdname AlignmentRecordRDD +#' @export setGeneric("realignIndels", function(ardd, ...) { standardGeneric("realignIndels") }) -# @rdname AlignmentRecordRDD-transforms -# @export -setGeneric("realignIndels", - function(ardd, knownIndels, ...) { standardGeneric("realignIndels") }) - #### Coverage operations #### -# @rdname CoverageRDD -# @export +#' The CoverageRDD class is used to manipulate read coverage counts. +#' +#' @name CoverageRDD +NULL + +#' @rdname CoverageRDD +#' @param ... additional argument(s). +#' @export setGeneric("collapse", function(ardd, ...) { standardGeneric("collapse") }) -# @rdname CoverageRDD -# @export +#' @rdname CoverageRDD +#' @export setGeneric("toFeatures", function(ardd) { standardGeneric("toFeatures") }) -# @rdname CoverageRDD -# @export +#' @rdname CoverageRDD +#' @export setGeneric("coverage", function(ardd, ...) { standardGeneric("coverage") }) -# @rdname CoverageRDD -# @export +#' @rdname CoverageRDD +#' @export +#' @aliases aggregatedCoverage,CoverageRDD-method setGeneric("aggregatedCoverage", function(ardd, ...) { standardGeneric("aggregatedCoverage") }) -# @rdname CoverageRDD -# @export +#' @rdname CoverageRDD +#' @export setGeneric("flatten", function(ardd) { standardGeneric("flatten") }) #### Fragment operations #### -# @rdname FragmentRDD -# @export +#' The FragmentRDD class is used to manipulate paired reads. +#' +#' @name FragmentRDD +NULL + +#' @rdname FragmentRDD +#' @param ardd The RDD to apply this to. +#' @export setGeneric("toReads", function(ardd) { standardGeneric("toReads") }) -#### Genotype operations #### +#### Genotype and Variant operations #### -# @rdname GenotypeRDD -# @export +#' Converts this RDD to VariantContexts. +#' +#' @param ardd The RDD to apply this to. +#' @return Returns this RDD of Variants as VariantContexts. +#' @export setGeneric("toVariantContexts", function(ardd) { standardGeneric("toVariantContexts") }) -# @rdname GenotypeRDD -# @export -setGeneric("saveAsParquet", - function(ardd, filePath) { standardGeneric("saveAsParquet") }) - #### NucleotideContigFragment operations #### -# @rdname NucleotideContigFragmentRDD -# @export +#' The NucleotideContigFragmentRDD class is used to manipulate contigs. +#' +#' @name NucleotideContigFragmentRDD +NULL + +#' @rdname NucleotideContigFragmentRDD +#' @param ardd The RDD to apply this to. +#' @param flankLength The length to extend adjacent records by. +#' @export setGeneric("flankAdjacentFragments", function(ardd, flankLength) { standardGeneric("flankAdjacentFragments") @@ -213,17 +271,20 @@ setGeneric("flankAdjacentFragments", #### Variant operations #### -# @rdname VariantRDD -# @export -setGeneric("toVariantContexts", - function(ardd) { standardGeneric("toVariantContexts") }) - -# @rdname VariantRDD -# @export -setGeneric("saveAsParquet", - function(ardd, filePath) { standardGeneric("saveAsParquet") }) - -# @rdname VariantContextRDD -# @export +#' The VariantContextRDD class is used to manipulate VCF-styled data. +#' +#' Each element in a VariantContext RDD corresponds to a VCF line. This +#' differs from the GenotypeRDD, where each element represents the genotype +#' of a single sample at a single site, or a VariantRDD, which represents +#' just the variant of interest. +#' +#' @name VariantContextRDD +NULL + +#' @rdname VariantContextRDD +#' @param ardd The RDD to apply this to. +#' @param filePath Path to save VCF to. +#' @param ... additional argument(s). +#' @export setGeneric("saveAsVcf", function(ardd, filePath, ...) { standardGeneric("saveAsVcf") }) diff --git a/adam-r/bdgenomics.adam/R/rdd.R b/adam-r/bdgenomics.adam/R/rdd.R index 56269bc4df..83b09af51e 100644 --- a/adam-r/bdgenomics.adam/R/rdd.R +++ b/adam-r/bdgenomics.adam/R/rdd.R @@ -19,44 +19,75 @@ library(SparkR) setOldClass("jobj") +#' A class that wraps an RDD of genomic data with helpful metadata. +#' +#' @rdname GenomicRDD +#' @slot jrdd The Java RDD that this class wraps. +#' #' @export setClass("GenomicRDD", slots = list(jrdd = "jobj")) - +#' A class that wraps a DataFrame of genomic data with helpful metadata. +#' +#' @rdname GenomicDataset +#' @slot jrdd The Java RDD that this class wraps. +#' #' @export setClass("GenomicDataset", slots = list(jrdd = "jobj"), contains = "GenomicRDD") - +#' A class that wraps an RDD of genomic reads with helpful metadata. +#' +#' @rdname AlignmentRecordRDD +#' @slot jrdd The Java RDD of AlignmentRecords that this class wraps. +#' #' @export setClass("AlignmentRecordRDD", slots = list(jrdd = "jobj"), contains = "GenomicDataset") +#' @importFrom methods new AlignmentRecordRDD <- function(jrdd) { new("AlignmentRecordRDD", jrdd = jrdd) } +#' A class that wraps an RDD of genomic coverage data with helpful metadata. +#' +#' @rdname CoverageRDD +#' @slot jrdd The Java RDD of Coverage that this class wraps. +#' #' @export setClass("CoverageRDD", slots = list(jrdd = "jobj"), contains = "GenomicDataset") +#' @importFrom methods new CoverageRDD <- function(jrdd) { new("CoverageRDD", jrdd = jrdd) } +#' A class that wraps an RDD of genomic features with helpful metadata. +#' +#' @rdname FeatureRDD +#' @slot jrdd The Java RDD of Features that this class wraps. +#' #' @export setClass("FeatureRDD", slots = list(jrdd = "jobj"), contains = "GenomicDataset") +#' @importFrom methods new FeatureRDD <- function(jrdd) { new("FeatureRDD", jrdd = jrdd) } +#' A class that wraps an RDD of read pairs grouped by sequencing fragment with helpful metadata. +#' +#' @rdname FragmentRDD +#' @slot jrdd The Java RDD of Fragments that this class wraps. +#' #' @export setClass("FragmentRDD", slots = list(jrdd = "jobj"), @@ -66,38 +97,62 @@ FragmentRDD <- function(jrdd) { new("FragmentRDD", jrdd = jrdd) } +#' A class that wraps an RDD of genotypes with helpful metadata. +#' +#' @rdname GenotypeRDD +#' @slot jrdd The Java RDD of Genotypes that this class wraps. +#' #' @export setClass("GenotypeRDD", slots = list(jrdd = "jobj"), contains = "GenomicDataset") +#' @importFrom methods new GenotypeRDD <- function(jrdd) { new("GenotypeRDD", jrdd = jrdd) } +#' A class that wraps an RDD of contigs with helpful metadata. +#' +#' @rdname NucleotideContigFragmentRDD +#' @slot jrdd The Java RDD of contigs that this class wraps. +#' #' @export setClass("NucleotideContigFragmentRDD", slots = list(jrdd = "jobj"), contains = "GenomicDataset") +#' @importFrom methods new NucleotideContigFragmentRDD <- function(jrdd) { new("NucleotideContigFragmentRDD", jrdd = jrdd) } +#' A class that wraps an RDD of variants with helpful metadata. +#' +#' @rdname VariantRDD +#' @slot jrdd The Java RDD of Variants that this class wraps. +#' #' @export setClass("VariantRDD", slots = list(jrdd = "jobj"), contains = "GenomicDataset") +#' @importFrom methods new VariantRDD <- function(jrdd) { new("VariantRDD", jrdd = jrdd) } +#' A class that wraps an RDD of both variants and genotypes with helpful metadata. +#' +#' @rdname VariantContextRDD +#' @slot jrdd The Java RDD of VariantContexts that this class wraps. +#' #' @export setClass("VariantContextRDD", slots = list(jrdd = "jobj"), contains = "GenomicRDD") +#' @importFrom methods new VariantContextRDD <- function(jrdd) { new("VariantContextRDD", jrdd = jrdd) } @@ -116,6 +171,7 @@ VariantContextRDD <- function(jrdd) { #' format the input to the pipe, and the implicit OutFormatter is used to #' parse the output from the pipe. #' +#' @param ardd The RDD to apply this to. #' @param cmd The command to run. #' @param tFormatter The name of the ADAM in-formatter class to use. #' @param xFormatter The name of the ADAM out-formatter class to use. @@ -130,6 +186,8 @@ VariantContextRDD <- function(jrdd) { #' @return Returns a new RDD where the input from the original RDD has #' been piped through a command that runs locally on each executor. #' +#' @importFrom SparkR sparkR.callJStatic sparkR.callJMethod +#' #' @export setMethod("pipe", signature(ardd = "GenomicRDD", @@ -185,8 +243,11 @@ setMethod("pipe", #' Sorts our genome aligned data by reference positions, with contigs ordered #' by index. #' +#' @param ardd The RDD to apply this to. #' @return Returns a new, sorted RDD, of the implementing class type. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("sort", signature(ardd = "GenomicRDD"), @@ -197,8 +258,11 @@ setMethod("sort", #' Sorts our genome aligned data by reference positions, with contigs ordered #' lexicographically. #' +#' @param ardd The RDD to apply this to. #' @return Returns a new, sorted RDD, of the implementing class type. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("sortLexicographically", signature(ardd = "GenomicRDD"), @@ -211,6 +275,8 @@ setMethod("sortLexicographically", #' @param ardd The RDD to convert into a dataframe. #' @return Returns a dataframe representing this RDD. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("toDF", signature(ardd = "GenomicDataset"), @@ -219,6 +285,7 @@ setMethod("toDF", new("SparkDataFrame", sdf, FALSE) }) +#' @importFrom SparkR sparkR.callJStatic setMethod("wrapTransformation", signature(ardd = "GenomicRDD", tFn = "function"), @@ -235,10 +302,13 @@ setMethod("wrapTransformation", #' Applies a function that transforms the underlying DataFrame into a new DataFrame #' using the Spark SQL API. #' +#' @param ardd The RDD to apply this to. #' @param tFn A function that transforms the underlying RDD as a DataFrame. #' @return A new RDD where the RDD of genomic data has been replaced, but the #' metadata (sequence dictionary, and etc) is copied without modification. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("transform", signature(ardd = "GenomicRDD", @@ -283,12 +353,15 @@ setMethod("destClassSuffix", #' Applies a function that transmutes the underlying DataFrame into a new RDD of a #' different type. #' +#' @param ardd The RDD to apply this to. #' @param tFn A function that transforms the underlying RDD as a DataFrame. -#' @param convFn The name of the ADAM GenomicDatasetConversion class to use. #' @param destClass The destination class of this transmutation. +#' @param convFn The name of the ADAM GenomicDatasetConversion class to use. #' @return A new RDD where the RDD of genomic data has been replaced, but the #' metadata (sequence dictionary, and etc) is copied without modification. #' +#' @importFrom SparkR sparkR.callJMethod sparkR.callJStatic +#' #' @export setMethod("transmute", signature(ardd = "GenomicRDD", @@ -326,9 +399,12 @@ setMethod("inferConversionFn", #' Convert this set of reads into fragments. #' +#' @param ardd The RDD to apply this to. #' @return Returns a FragmentRDD where all reads have been grouped together by #' the original sequence fragment they come from. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("toFragments", signature(ardd = "AlignmentRecordRDD"), @@ -338,6 +414,7 @@ setMethod("toFragments", #' Saves this RDD to disk as a SAM/BAM/CRAM file. #' +#' @param ardd The RDD to apply this to. #' @param filePath The path to save the file to. #' @param asType The type of file to save. Valid choices are SAM, BAM, #' CRAM, and NA. If None, the file type is inferred from the extension. @@ -345,6 +422,8 @@ setMethod("toFragments", #' @param asSingleFile Whether to save the file as a single merged #' file or as shards. #' +#' @importFrom SparkR sparkR.callJMethod sparkR.callJStatic +#' #' @export setMethod("saveAsSam", signature(ardd = "AlignmentRecordRDD", filePath = "character"), @@ -374,10 +453,13 @@ setMethod("saveAsSam", #' Converts this set of reads into a corresponding CoverageRDD. #' +#' @param ardd The RDD to apply this to. #' @param collapse Determines whether to merge adjacent coverage elements with #' the same score to a single coverage observation. #' @return Returns an RDD with observed coverage. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("toCoverage", signature(ardd = "AlignmentRecordRDD"), @@ -387,9 +469,12 @@ setMethod("toCoverage", #' Saves this RDD to disk, with the type identified by the extension. #' +#' @param ardd The RDD to apply this to. #' @param filePath The path to save the file to. #' @param isSorted Whether the file is sorted or not. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("save", signature(ardd = "AlignmentRecordRDD", filePath = "character"), @@ -399,9 +484,12 @@ setMethod("save", #' Cuts reads into _k_-mers, and then counts the occurrences of each _k_-mer. #' +#' @param ardd The RDD to apply this to. #' @param kmerLength The value of _k_ to use for cutting _k_-mers. #' @return Returns a DataFrame containing k-mer/count pairs. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("countKmers", signature(ardd = "AlignmentRecordRDD", kmerLength = "numeric"), @@ -421,8 +509,11 @@ setMethod("countKmers", #' put at the end and sorted by read name. Contigs are ordered lexicographically #' by name. #' +#' @param ardd The RDD to apply this to. #' @return A new, sorted AlignmentRecordRDD. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("sortReadsByReferencePosition", signature(ardd = "AlignmentRecordRDD"), @@ -436,8 +527,11 @@ setMethod("sortReadsByReferencePosition", #' put at the end and sorted by read name. Contigs are ordered by index that #' they are ordered in the sequence metadata. #' +#' @param ardd The RDD to apply this to. #' @return A new, sorted AlignmentRecordRDD. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("sortReadsByReferencePositionAndIndex", signature(ardd = "AlignmentRecordRDD"), @@ -447,9 +541,12 @@ setMethod("sortReadsByReferencePositionAndIndex", #' Marks reads as possible fragment duplicates. #' +#' @param ardd The RDD to apply this to. #' @return A new RDD where reads have the duplicate read flag set. Duplicate #' reads are NOT filtered out. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("markDuplicates", signature(ardd = "AlignmentRecordRDD"), @@ -461,9 +558,13 @@ setMethod("markDuplicates", #' #' Uses a table of known SNPs to mask true variation during the recalibration #' process. +#' +#' @param ardd The RDD to apply this to. #' @param knownSnps A table of known SNPs to mask valid variants. #' @param validationStringency The stringency to apply towards validating BQSR. #' +#' @importFrom SparkR sparkR.callJMethod sparkR.callJStatic +#' #' @export setMethod("recalibrateBaseQualities", signature(ardd = "AlignmentRecordRDD", knownSnps = "VariantRDD", validationStringency = "character"), @@ -472,91 +573,66 @@ setMethod("recalibrateBaseQualities", AlignmentRecordRDD(sparkR.callJMethod(ardd@jrdd, "recalibrateBaseQualities", knownSnps@jrdd, stringency)) }) -#' Realigns indels using a concensus-based heuristic. -#' -#' Generates consensuses from reads. +#' Realigns indels using a consensus-based heuristic. #' +#' If no known indels are provided, generates consensuses from reads. Else, +#' generates consensuses from previously seen variants. +#' +#' @param ardd The RDD to apply this to. #' @param isSorted If the input data is sorted, setting this parameter to true #' avoids a second sort. -#' @param int maxIndelSize The size of the largest indel to use for realignment. +#' @param maxIndelSize The size of the largest indel to use for realignment. #' @param maxConsensusNumber The maximum number of consensus sequences to #' realign against per target region. #' @param lodThreshold Log-odds threshold to use when realigning; realignments #' are only finalized if the log-odds threshold is exceeded. #' @param maxTargetSize The maximum width of a single target region for #' realignment. +#' @param knownIndels An RDD of previously called INDEL variants. #' @return Returns an RDD of mapped reads which have been realigned. #' +#' @importFrom SparkR sparkR.callJMethod sparkR.callJStatic +#' #' @export setMethod("realignIndels", signature(ardd = "AlignmentRecordRDD"), function(ardd, isSorted = FALSE, maxIndelSize = 500, maxConsensusNumber = 30, lodThreshold = 5.0, - maxTargetSize = 3000) { - consensusModel <- sparkR.callJStatic("org.bdgenomics.adam.algorithms.consensus.ConsensusGenerator", - "fromReads") - AlignmentRecordRDD(sparkR.callJMethod(ardd@jrdd, "realignIndels", - consensusModel, - isSorted, - maxIndelSize, - maxConsensusNumber, - lodThreshold, - maxTargetSize)) - }) + maxTargetSize = 3000, + knownIndels = NA) { + + if (is.na(knownIndels)) { + consensusModel <- sparkR.callJStatic("org.bdgenomics.adam.algorithms.consensus.ConsensusGenerator", + "fromKnowns", knownIndels@jrdd) + AlignmentRecordRDD(sparkR.callJMethod(ardd@jrdd, "realignIndels", + consensusModel, + isSorted, + maxIndelSize, + maxConsensusNumber, + lodThreshold, + maxTargetSize)) -#' Realigns indels using a concensus-based heuristic. -#' -#' Generates consensuses from previously seen variants. -#' -#' @param knownIndels An RDD of previously called INDEL variants. -#' @param isSorted If the input data is sorted, setting this parameter to true -#' avoids a second sort. -#' @param int maxIndelSize The size of the largest indel to use for realignment. -#' @param maxConsensusNumber The maximum number of consensus sequences to -#' realign against per target region. -#' @param lodThreshold Log-odds threshold to use when realigning; realignments -#' are only finalized if the log-odds threshold is exceeded. -#' @param maxTargetSize The maximum width of a single target region for -#' realignment. -#' @return Returns an RDD of mapped reads which have been realigned. -#' -#' @export -setMethod("realignIndels", - signature(ardd = "AlignmentRecordRDD", knownIndels = "VariantRDD"), - function(ardd, knownIndels, isSorted = FALSE, maxIndelSize = 500, - maxConsensusNumber = 30, lodThreshold = 5.0, - maxTargetSize = 3000) { - consensusModel <- sparkR.callJStatic("org.bdgenomics.adam.algorithms.consensus.ConsensusGenerator", - "fromKnowns", knownIndels@jrdd) - AlignmentRecordRDD(sparkR.callJMethod(ardd@jrdd, "realignIndels", - consensusModel, - isSorted, - maxIndelSize, - maxConsensusNumber, - lodThreshold, - maxTargetSize)) - }) - -setMethod("replaceRdd", - signature(ardd = "CoverageRDD", - rdd = "jobj"), - function(ardd, rdd) { - CoverageRDD(rdd) - }) - -setMethod("inferConversionFn", - signature(ardd = "CoverageRDD", - destClass = "character"), - function(ardd, destClass) { - paste0("org.bdgenomics.adam.api.java.CoverageTo", - destClassSuffix(destClass)) + } else { + consensusModel <- sparkR.callJStatic("org.bdgenomics.adam.algorithms.consensus.ConsensusGenerator", + "fromReads") + AlignmentRecordRDD(sparkR.callJMethod(ardd@jrdd, "realignIndels", + consensusModel, + isSorted, + maxIndelSize, + maxConsensusNumber, + lodThreshold, + maxTargetSize)) + } }) #' Saves coverage as a feature file. #' +#' @param ardd The RDD to apply this to. #' @param filePath The location to write the output. #' @param asSingleFile If true, merges the sharded output into a single file. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("save", signature(ardd = "CoverageRDD", filePath = "character"), @@ -571,8 +647,11 @@ setMethod("save", #' 3.0) and Coverage("chr1", 10, 20, 3.0) would be merged into one record #' Coverage("chr1", 1, 20, 3.0). #' +#' @param ardd The RDD to apply this to. #' @return An RDD with merged tuples of adjacent sites with same coverage. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("collapse", signature(ardd = "CoverageRDD"), function(ardd) { @@ -581,8 +660,11 @@ setMethod("collapse", signature(ardd = "CoverageRDD"), #' Converts CoverageRDD to FeatureRDD. #' +#' @param ardd The RDD to apply this to. #' @return Returns a FeatureRDD from a CoverageRDD. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("toFeatures", signature(ardd = "CoverageRDD"), function(ardd) { @@ -595,9 +677,12 @@ setMethod("toFeatures", signature(ardd = "CoverageRDD"), #' bin together ReferenceRegions of equal size. The coverage of each bin is the #' coverage of the first base pair in that bin. #' +#' @param ardd The RDD to apply this to. #' @param bpPerBin Number of bases to combine to one bin. #' @return Returns a sparsified CoverageRDD. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("coverage", signature(ardd = "CoverageRDD"), function(ardd, bpPerBin = 1) { @@ -610,9 +695,14 @@ setMethod("coverage", signature(ardd = "CoverageRDD"), #' bin together ReferenceRegions of equal size. The coverage of each bin is the #' average coverage of the bases in that bin. #' +#' @param ardd The RDD to apply this to. #' @param bpPerBin Number of bases to combine to one bin. #' @return Returns a sparsified CoverageRDD. #' +#' @rdname CoverageRDD +#' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("aggregatedCoverage", signature(ardd = "CoverageRDD"), function(ardd, bpPerBin = 1) { @@ -623,8 +713,11 @@ setMethod("aggregatedCoverage", signature(ardd = "CoverageRDD"), #' #' The opposite operation of collapse. #' +#' @param ardd The RDD to apply this to. #' @return New CoverageRDD of flattened coverage. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("flatten", signature(ardd = "CoverageRDD"), function(ardd) { @@ -653,11 +746,14 @@ setMethod("replaceRdd", #' these match, we fall back to Parquet. These files are written as sharded text #' files, which can be merged by passing asSingleFile = True. #' +#' @param ardd The RDD to apply this to. #' @param filePath The location to write the output. #' @param asSingleFile If true, merges the sharded output into a single file. #' @param disableFastConcat If asSingleFile is true, disables the use of the #' fast concatenation engine for saving to HDFS. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("save", signature(ardd = "FeatureRDD", filePath = "character"), @@ -669,8 +765,11 @@ setMethod("save", #' Converts the FeatureRDD to a CoverageRDD. #' +#' @param ardd The RDD to apply this to. #' @return Returns a new CoverageRDD. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("toCoverage", signature(ardd = "FeatureRDD"), function(ardd) { @@ -694,8 +793,11 @@ setMethod("replaceRdd", #' Splits up the reads in a Fragment, and creates a new RDD. #' +#' @param ardd The RDD to apply this to. #' @return Returns this RDD converted back to reads. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("toReads", signature(ardd = "FragmentRDD"), function(ardd) { @@ -704,9 +806,12 @@ setMethod("toReads", signature(ardd = "FragmentRDD"), #' Marks reads as possible fragment duplicates. #' +#' @param ardd The RDD to apply this to. #' @return A new RDD where reads have the duplicate read flag set. Duplicate #' reads are NOT filtered out. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("markDuplicates", signature(ardd = "FragmentRDD"), function(ardd) { @@ -715,8 +820,11 @@ setMethod("markDuplicates", signature(ardd = "FragmentRDD"), #' Saves fragments to Parquet. #' +#' @param ardd The RDD to apply this to. #' @param filePath Path to save fragments to. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("save", signature(ardd = "FragmentRDD", filePath = "character"), function(ardd, filePath) { @@ -740,8 +848,11 @@ setMethod("replaceRdd", #' Saves this RDD of genotypes to disk as Parquet. #' +#' @param ardd The RDD to apply this to. #' @param filePath Path to save file to. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("saveAsParquet", signature(ardd = "GenotypeRDD", filePath = "character"), function(ardd, filePath) { @@ -750,8 +861,11 @@ setMethod("saveAsParquet", signature(ardd = "GenotypeRDD", filePath = "character #' Converts this RDD of Genotypes to VariantContexts. #' +#' @param ardd The RDD to apply this to. #' @return Returns this RDD of Genotypes as VariantContexts. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("toVariantContexts", signature(ardd = "GenotypeRDD"), function(ardd) { @@ -778,8 +892,11 @@ setMethod("replaceRdd", #' If filename ends in .fa or .fasta, saves as Fasta. If not, saves fragments to #' Parquet. Defaults to 60 character line length, if saving as FASTA. #' +#' @param ardd The RDD to apply this to. #' @param filePath Path to save to. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("save", signature(ardd = "NucleotideContigFragmentRDD", filePath = "character"), function(ardd, filePath) { @@ -789,10 +906,13 @@ setMethod("save", signature(ardd = "NucleotideContigFragmentRDD", filePath = "ch #' For all adjacent records in the RDD, we extend the records so that the #' adjacent records now overlap by _n_ bases, where _n_ is the flank length. #' +#' @param ardd The RDD to apply this to. #' @param flankLength The length to extend adjacent records by. #' @return Returns the RDD, with all adjacent fragments extended with flanking #' sequence. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("flankAdjacentFragments", signature(ardd = "NucleotideContigFragmentRDD", flankLength = "numeric"), @@ -819,8 +939,11 @@ setMethod("replaceRdd", #' Saves this RDD of variants to disk as Parquet. #' +#' @param ardd The RDD to apply this to. #' @param filePath Path to save file to. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("saveAsParquet", signature(ardd = "VariantRDD", filePath = "character"), function(ardd, filePath) { @@ -829,8 +952,11 @@ setMethod("saveAsParquet", signature(ardd = "VariantRDD", filePath = "character" #' Converts this RDD of Variants to VariantContexts. #' +#' @param ardd The RDD to apply this to. #' @return Returns this RDD of Variants as VariantContexts. #' +#' @importFrom SparkR sparkR.callJMethod +#' #' @export setMethod("toVariantContexts", signature(ardd = "VariantRDD"), function(ardd) { @@ -846,15 +972,18 @@ setMethod("replaceRdd", #' Saves this RDD of variant contexts to disk as VCF #' +#' @param ardd The RDD to apply this to. #' @param filePath Path to save VCF to. #' @param asSingleFile If true, saves the output as a single file #' by merging the sharded output after saving. #' @param deferMerging If true, saves the output as prepped for merging #' into a single file, but does not merge. #' @param stringency The stringency to use when writing the VCF. -#' @param disableFastConcat: If asSingleFile is true, disables the use +#' @param disableFastConcat If asSingleFile is true, disables the use #' of the fast concatenation engine for saving to HDFS. #' +#' @importFrom SparkR sparkR.callJMethod sparkR.callJStatic +#' #' @export setMethod("saveAsVcf", signature(ardd = "VariantContextRDD", filePath = "character"), function(ardd, diff --git a/adam-r/bdgenomics.adam/tests/testthat/test_adamContext.R b/adam-r/bdgenomics.adam/tests/testthat/test_adamContext.R index f5cdcc6aa8..8582117c02 100644 --- a/adam-r/bdgenomics.adam/tests/testthat/test_adamContext.R +++ b/adam-r/bdgenomics.adam/tests/testthat/test_adamContext.R @@ -16,11 +16,11 @@ # limitations under the License. # library(bdgenomics.adam) +library(SparkR) context("basic ADAM context functions") -sc <- sparkR.session() -ac <- ADAMContext(sc) +ac <- createADAMContext() test_that("load reads", { reads <- loadAlignments(ac, resourceFile("small.sam")) diff --git a/adam-r/bdgenomics.adam/tests/testthat/test_alignmentRecordRdd.R b/adam-r/bdgenomics.adam/tests/testthat/test_alignmentRecordRdd.R index ec1eb70410..ae91240244 100644 --- a/adam-r/bdgenomics.adam/tests/testthat/test_alignmentRecordRdd.R +++ b/adam-r/bdgenomics.adam/tests/testthat/test_alignmentRecordRdd.R @@ -19,8 +19,7 @@ library(bdgenomics.adam) context("manipulating alignmentrecords") -sc <- sparkR.session() -ac <- ADAMContext(sc) +ac <- createADAMContext() test_that("save sorted sam", { diff --git a/adam-r/bdgenomics.adam/tests/testthat/test_featureRdd.R b/adam-r/bdgenomics.adam/tests/testthat/test_featureRdd.R index 27b789a7a7..ba10e6103d 100644 --- a/adam-r/bdgenomics.adam/tests/testthat/test_featureRdd.R +++ b/adam-r/bdgenomics.adam/tests/testthat/test_featureRdd.R @@ -19,8 +19,7 @@ library(bdgenomics.adam) context("manipulating features") -sc <- sparkR.session() -ac <- ADAMContext(sc) +ac <- createADAMContext() test_that("round trip gtf", { testFile <- resourceFile("Homo_sapiens.GRCh37.75.trun20.gtf") diff --git a/adam-r/bdgenomics.adam/tests/testthat/test_genotypeRdd.R b/adam-r/bdgenomics.adam/tests/testthat/test_genotypeRdd.R index 04bf83e186..abb29326d8 100644 --- a/adam-r/bdgenomics.adam/tests/testthat/test_genotypeRdd.R +++ b/adam-r/bdgenomics.adam/tests/testthat/test_genotypeRdd.R @@ -19,8 +19,7 @@ library(bdgenomics.adam) context("manipulating genotypes") -sc <- sparkR.session() -ac <- ADAMContext(sc) +ac <- createADAMContext() test_that("round trip vcf", { testFile <- resourceFile("small.vcf") diff --git a/adam-r/bdgenomics.adam/tests/testthat/test_variantRdd.R b/adam-r/bdgenomics.adam/tests/testthat/test_variantRdd.R index 49893b07dd..e9fb59d6f7 100644 --- a/adam-r/bdgenomics.adam/tests/testthat/test_variantRdd.R +++ b/adam-r/bdgenomics.adam/tests/testthat/test_variantRdd.R @@ -19,8 +19,7 @@ library(bdgenomics.adam) context("manipulating variants") -sc <- sparkR.session() -ac <- ADAMContext(sc) +ac <- createADAMContext() test_that("round trip vcf", { testFile <- resourceFile("small.vcf") diff --git a/scripts/release/release.sh b/scripts/release/release.sh index bb5443bc69..b07b4a4124 100755 --- a/scripts/release/release.sh +++ b/scripts/release/release.sh @@ -129,6 +129,18 @@ rm -rf release-venv popd +# build R tarball +# +# !!!!! +# NOTE: +# !!!!! +# +# We will not be pushing to CRAN until SparkR is reinstated in CRAN. Until then, +# this tarball will need to be manually attached to the github releases page +pushd adam-r +R CMD build bdgenomics.adam +popd + if [ $? != 0 ]; then echo "Releasing bdgenomics.adam to PyPi failed." exit 1