diff --git a/adam-python/bdgenomics/adam/__init__.py b/adam-python/bdgenomics/adam/__init__.py index bacfd5b848..e3348e31ec 100644 --- a/adam-python/bdgenomics/adam/__init__.py +++ b/adam-python/bdgenomics/adam/__init__.py @@ -15,3 +15,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # +r""" +======================= +bdgenomics.adam Package +======================= +.. currentmodule:: bdgenomics.adam + +ADAM's Python API wraps the ADAMContext and GenomicRDD APIs so they can be used from PySpark. +The Python API is feature complete relative to ADAM's Java API. + +.. automodule:: bdgenomics.adam.adamContext +.. automodule:: bdgenomics.adam.models +.. automodule:: bdgenomics.adam.rdd +.. automodule:: bdgenomics.adam.stringency + +""" diff --git a/adam-python/bdgenomics/adam/adamContext.py b/adam-python/bdgenomics/adam/adamContext.py index a94d179e97..51c48fc655 100644 --- a/adam-python/bdgenomics/adam/adamContext.py +++ b/adam-python/bdgenomics/adam/adamContext.py @@ -15,6 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # +r""" +=========== +adamContext +=========== +.. currentmodule:: bdgenomics.adam.adamContext +.. autosummary:: + :toctree: _generate/ + + ADAMContext +""" from bdgenomics.adam.rdd import AlignmentRecordRDD, \ CoverageRDD, \ @@ -35,10 +45,9 @@ class ADAMContext(object): def __init__(self, ss): """ - Initializes an ADAMContext using a SparkContext. + Initializes an ADAMContext using a SparkSession. - :param pyspark.context.SparkContext sc: The currently active - SparkContext. + :param ss: The currently active pyspark.context.SparkContext. """ self._sc = ss.sparkContext @@ -56,9 +65,9 @@ def loadAlignments(self, filePath, stringency=STRICT): * .fa/.fasta as FASTA format, * .fq/.fastq as FASTQ format, and * .ifq as interleaved FASTQ format. - + If none of these match, fall back to Parquet + Avro. - + For FASTA, FASTQ, and interleaved FASTQ formats, compressed files are supported through compression codecs configured in Hadoop, which by default include .gz and .bz2, but can include more. @@ -100,13 +109,13 @@ def loadIndexedBam(self, # translate reference regions into jvm types javaRrs = [rr._toJava(self._jvm) for rr in viewRegions] - + adamRdd = self.__jac.loadIndexedBam(filePath, javaRrs, _toJava(stringency, self._jvm)) return AlignmentRecordRDD(adamRdd, self._sc) - + def loadCoverage(self, filePath, stringency=STRICT): @@ -137,7 +146,7 @@ def loadCoverage(self, filePath, _toJava(stringency, self._jvm)) return CoverageRDD(adamRdd, self._sc) - + def loadContigFragments(self, filePath): """ diff --git a/adam-python/bdgenomics/adam/models.py b/adam-python/bdgenomics/adam/models.py index 24ff428b8b..25860e5bb6 100644 --- a/adam-python/bdgenomics/adam/models.py +++ b/adam-python/bdgenomics/adam/models.py @@ -15,8 +15,21 @@ # See the License for the specific language governing permissions and # limitations under the License. # +r""" +====== +models +====== +.. currentmodule:: bdgenomics.adam.models +.. autosummary:: + :toctree: _generate/ + + ReferenceRegion +""" class ReferenceRegion: + """ + Represents a contiguous region of the reference genome. + """ def __init__(self, referenceName, start, end): """ diff --git a/adam-python/bdgenomics/adam/rdd.py b/adam-python/bdgenomics/adam/rdd.py index c9fa987303..e99473a903 100644 --- a/adam-python/bdgenomics/adam/rdd.py +++ b/adam-python/bdgenomics/adam/rdd.py @@ -15,6 +15,25 @@ # See the License for the specific language governing permissions and # limitations under the License. # +r""" +=== +rdd +=== +.. currentmodule:: bdgenomics.adam.rdd +.. autosummary:: + :toctree: _generate/ + + GenomicDataset + VCFSupportingGenomicDataset + AlignmentRecordRDD + CoverageRDD + FeatureRDD + FragmentRDD + GenotypeRDD + NucleotideContigFragmentRDD + VariantRDD + VariantContextRDD +""" import logging @@ -27,7 +46,9 @@ _log = logging.getLogger(__name__) class GenomicDataset(object): - + """ + Wraps an RDD of genomic data with helpful metadata. + """ def __init__(self, jvmRdd, sc): """ @@ -140,7 +161,7 @@ def union(self, rdds): :param list rdds: The RDDs to union into this RDD. :return: Returns a new RDD containing the union of this RDD and the other RDDs. """ - + return self._replaceRdd(self._jvmRdd.union(map(lambda x: x._jvmRdd, rdds))) @@ -156,7 +177,7 @@ def _wrapTransformation(self, jvm = self.sc._jvm return jvm.org.bdgenomics.adam.api.python.DataFrameConversionWrapper(newDf._jdf) - + def transform(self, tFn): """ Applies a function that transforms the underlying DataFrame into a new DataFrame @@ -169,7 +190,7 @@ def transform(self, tFn): # apply the lambda to the underlying DF dfFn = self._wrapTransformation(tFn) - + return self._replaceRdd(self._jvmRdd.transformDataFrame(dfFn)) @@ -224,7 +245,7 @@ def _destClassSuffix(self, destClass): else: raise ValueError("No conversion method known for %s." % destClass) - + def pipe(self, cmd, tFormatter, @@ -235,11 +256,11 @@ def pipe(self, flankSize=0): """ Pipes genomic data to a subprocess that runs in parallel using Spark. - + Files are substituted in to the command with a $x syntax. E.g., to invoke a command that uses the first file from the files Seq, use $0. To access the path to the directory where the files are copied, use $root. - + Pipes require the presence of an InFormatterCompanion and an OutFormatter as implicit values. The InFormatterCompanion should be a singleton whose apply method builds an InFormatter given a specific type of GenomicDataset. @@ -265,11 +286,11 @@ def pipe(self, jvm = self.sc._jvm tFormatterClass = get_java_class(getattr(jvm, tFormatter)) - + xFormatterInst = getattr(jvm, xFormatter)() convFnInst = getattr(jvm, convFn)() - + if files is None: files = [] @@ -294,7 +315,7 @@ def broadcastRegionJoin(self, genomicRdd, flankSize=0): used for this join is the reference region overlap function. Since this is an inner join, all values who do not overlap a value from the other RDD are dropped. - + :param GenomicDataset genomicRdd: The right RDD in the join. :param int flankSize: Sets a flankSize for the distance between elements to be joined. If set to 0, an overlap is required to join two elements. @@ -310,7 +331,7 @@ def broadcastRegionJoin(self, genomicRdd, flankSize=0): def rightOuterBroadcastRegionJoin(self, genomicRdd, flankSize=0): """ Performs a broadcast right outer join between this RDD and another RDD. - + In a broadcast join, the left RDD (this RDD) is collected to the driver, and broadcast to all the nodes in the cluster. The key equality function used for this join is the reference region overlap function. Since this @@ -357,7 +378,7 @@ def broadcastRegionJoinAndGroupByRight(self, genomicRdd, flankSize=0): def rightOuterBroadcastRegionJoinAndGroupByRight(self, genomicRdd, flankSize=0): """ Performs a broadcast right outer join between this RDD and another RDD. - + In a broadcast join, the left side of the join (broadcastTree) is broadcast to to all the nodes in the cluster. The key equality function used for this join is the reference region overlap function. Since this @@ -373,7 +394,7 @@ def rightOuterBroadcastRegionJoinAndGroupByRight(self, genomicRdd, flankSize=0): overlapped in the genomic coordinate space, and all keys from the right RDD that did not overlap a key in the left RDD. """ - + return GenomicDataset(self._jvmRdd.rightOuterBroadcastRegionJoinAndGroupByRight(genomicRdd._jvmRdd, flankSize), self.sc) @@ -544,18 +565,21 @@ def shuffleRegionJoinAndGroupByLeft(self, genomicRdd, flankSize=0): return GenomicDataset(self._jvmRdd.shuffleRegionJoinAndGroupByLeft(genomicRdd._jvmRdd, flankSize), self.sc) - + def toDF(self): """ + Converts this GenomicDatset into a dataframe. :return: Returns a dataframe representing this RDD. """ - + return DataFrame(self._jvmRdd.toDF(), SQLContext(self.sc)) - - -class VCFSupportingGenomicDataset(GenomicDataset): +class VCFSupportingGenomicDataset(GenomicDataset): + """ + Wraps an GenomicDatset with VCF metadata. + """ + def __init__(self, jvmRdd, sc): """ Constructs a Python GenomicDataset from a JVM GenomicDataset. @@ -575,7 +599,7 @@ def _javaType(self, lineType): :param lineType: A Python type. """ - + jvm = self.sc._jvm if lineType == str: @@ -595,7 +619,7 @@ def _javaType(self, lineType): else: raise ValueError('Invalid type {}. Supported types are str, int, float, chr, bool'.format(lineType)) - + def addFixedArrayFormatHeaderLine(self, name, @@ -708,7 +732,7 @@ def addAllAlleleArrayFormatHeaderLine(self, return self._replaceRdd(self._jvmRdd.addAllAlleleArrayFormatHeaderLine(name, description, self._javaType(lineType))) - + def addFixedArrayInfoHeaderLine(self, name, @@ -813,9 +837,11 @@ def addFilterHeaderLine(self, return self._replaceRdd(self._jvmRdd.addFilterHeaderLine(name, description)) - -class AlignmentRecordRDD(GenomicDataset): +class AlignmentRecordRDD(GenomicDataset): + """ + Wraps an GenomicDatset with Alignment Record metadata and functions. + """ def __init__(self, jvmRdd, sc): """ @@ -838,8 +864,8 @@ def _replaceRdd(self, newRdd): def _inferConversionFn(self, destClass): return "org.bdgenomics.adam.api.java.AlignmentRecordsTo%s" % self._destClassSuffix(destClass) - - + + def toFragments(self): """ Convert this set of reads into fragments. @@ -906,7 +932,7 @@ def saveAsSam(self, self._jvmRdd.saveAsSam(filePath, fileType, asSingleFile, isSorted) - + def saveAsSamString(self): """ Converts an RDD into the SAM spec string it represents. @@ -918,10 +944,10 @@ def saveAsSamString(self): :return: A string on the driver representing this RDD of reads in SAM format. :rtype: str """ - + return self._jvmRdd.saveAsSamString() - + def countKmers(self, kmerLength): """ Cuts reads into _k_-mers, and then counts the number of occurrences of each _k_-mer. @@ -941,22 +967,22 @@ def sortReadsByReferencePosition(self): Sorts reads by the location where they are aligned. Unaligned reads are put at the end and sorted by read name. Contigs are ordered - lexicographically by name. + lexicographically by name. :return: Returns a new RDD containing sorted reads. :rtype: bdgenomics.adam.rdd.AlignmentRecordRDD """ - + return AlignmentRecordRDD(self._jvmRdd.sortReadsByReferencePosition(), self.sc) - + def sortReadsByReferencePositionAndIndex(self): """ Sorts our read data by reference positions, with contigs ordered by index. Sorts reads by the location where they are aligned. Unaligned reads are - put at the end and sorted by read name. Contigs are ordered by index + put at the end and sorted by read name. Contigs are ordered by index that they are ordered in the sequence metadata. :return: Returns a new RDD containing sorted reads. @@ -990,11 +1016,11 @@ def recalibrateBaseQualities(self, :param bdgenomics.adam.rdd.VariantRDD knownSnps: A table of known SNPs to mask valid variants. :param bdgenomics.adam.stringency validationStringency: """ - + return AlignmentRecordRDD(self._jvmRdd.recalibrateBaseQualities(knownSnps._jvmRdd, _toJava(validationStringency, self.sc._jvm))) - + def realignIndels(self, isSorted = False, maxIndelSize = 500, @@ -1003,7 +1029,7 @@ def realignIndels(self, maxTargetSize = 3000): """ Realigns indels using a concensus-based heuristic. - + Generates consensuses from reads. :param bool isSorted: If the input data is sorted, setting this @@ -1039,7 +1065,7 @@ def realignIndels(self, maxTargetSize = 3000): """ Realigns indels using a concensus-based heuristic. - + Generates consensuses from prior called INDELs. :param bdgenomics.adam.rdd.VariantRDD knownIndels: An RDD of previously @@ -1085,7 +1111,7 @@ def saveAsPairedFastq(self, validationStringency = LENIENT): """ Saves these AlignmentRecords to two FASTQ files. - + The files are one for the first mate in each pair, and the other for the second mate in the pair. @@ -1102,7 +1128,7 @@ def saveAsPairedFastq(self, :param pyspark.storagelevel.StorageLevel persistLevel: The persistance level to cache reads at between passes. """ - + self._jvmRdd.saveAsPairedFastq(fileName1, fileName2, outputOriginalBaseQualities, _toJava(validationStringency, self.sc._jvm), @@ -1128,7 +1154,7 @@ def saveAsFastq(self, false, writes out reads with the base qualities from the qual field. Default is false. """ - + self._jvmRdd.saveAsFastq(fileName, outputOriginalBaseQualities, sort, @@ -1152,14 +1178,16 @@ def reassembleReadPairs(self, :return: Returns an RDD with the pair information recomputed. :rtype: bdgenomics.adam.rdd.AlignmentRecordRDD """ - + return AlignmentRecordRDD(self._jvmRdd.reassembleReadPairs(rdd._jrdd, _toJava(validationStringency, self.sc._jvm)), self.sc) class CoverageRDD(GenomicDataset): - + """ + Wraps an GenomicDatset with Coverage metadata and functions. + """ def _replaceRdd(self, newRdd): @@ -1206,7 +1234,7 @@ def collapse(self): return CoverageRDD(self._jvmRdd.collapse(), self.sc) - + def toFeatures(self): """ Converts CoverageRDD to FeatureRDD. @@ -1214,7 +1242,7 @@ def toFeatures(self): :return: Returns a FeatureRDD from CoverageRDD. :rtype: bdgenomics.adam.rdd.FeatureRDD """ - + return FeatureRDD(self._jvmRdd.toFeatures(), self.sc) @@ -1269,7 +1297,9 @@ def _inferConversionFn(self, destClass): class FeatureRDD(GenomicDataset): - + """ + Wraps an GenomicDatset with Feature metadata and functions. + """ def _replaceRdd(self, newRdd): @@ -1308,7 +1338,7 @@ def save(self, filePath, asSingleFile = False, disableFastConcat = False): self._jvmRdd.save(filePath, asSingleFile, disableFastConcat) - + def toCoverage(self): """ Converts the FeatureRDD to a CoverageRDD. @@ -1326,7 +1356,9 @@ def _inferConversionFn(self, destClass): class FragmentRDD(GenomicDataset): - + """ + Wraps an GenomicDatset with Fragment metadata and functions. + """ def _replaceRdd(self, newRdd): @@ -1345,11 +1377,11 @@ def __init__(self, jvmRdd, sc): GenomicDataset.__init__(self, jvmRdd, sc) - + def toReads(self): """ Splits up the reads in a Fragment, and creates a new RDD. - + :return: Returns this RDD converted back to reads. :rtype: bdgenomics.adam.rdd.AlignmentRecordRDD """ @@ -1385,7 +1417,9 @@ def _inferConversionFn(self, destClass): class GenotypeRDD(VCFSupportingGenomicDataset): - + """ + Wraps an GenomicDatset with Genotype metadata and functions. + """ def _replaceRdd(self, newRdd): @@ -1437,15 +1471,17 @@ def toVariants(self, dedupe=False): """ return VariantRDD(self._jvmRdd.toVariants(dedupe), self.sc) - - + + def _inferConversionFn(self, destClass): return "org.bdgenomics.adam.api.java.GenotypesTo%s" % self._destClassSuffix(destClass) class NucleotideContigFragmentRDD(GenomicDataset): - + """ + Wraps an GenomicDatset with Nucleotide Contig Fragment metadata and functions. + """ def _replaceRdd(self, newRdd): @@ -1498,7 +1534,7 @@ def flankAdjacentFragments(self, flankLength): def countKmers(self, kmerLength): """ Counts the k-mers contained in a FASTA contig. - + :param int kmerLength: The value of _k_ to use for cutting _k_-mers. :return: Returns an RDD containing k-mer/count pairs. :rtype: pyspark.rdd.RDD[str,long] @@ -1513,7 +1549,9 @@ def _inferConversionFn(self, destClass): class VariantRDD(VCFSupportingGenomicDataset): - + """ + Wraps an GenomicDatset with Variant metadata and functions. + """ def _replaceRdd(self, newRdd): @@ -1537,10 +1575,10 @@ def toVariantContexts(self): """ :return: These variants, converted to variant contexts. """ - + vcs = self._jvmRdd.toVariantContexts() return VariantContextRDD(vcs, self.sc) - + def saveAsParquet(self, filePath): """ @@ -1556,9 +1594,11 @@ def _inferConversionFn(self, destClass): return "org.bdgenomics.adam.api.java.VariantsTo%s" % self._destClassSuffix(destClass) - -class VariantContextRDD(VCFSupportingGenomicDataset): +class VariantContextRDD(VCFSupportingGenomicDataset): + """ + Wraps an GenomicDataset with Variant Context metadata and functions. + """ def _replaceRdd(self, newRdd): @@ -1576,7 +1616,7 @@ def __init__(self, jvmRdd, sc): """ VCFSupportingGenomicDataset.__init__(self, jvmRdd, sc) - + def saveAsVcf(self, filePath, diff --git a/adam-python/bdgenomics/adam/stringency.py b/adam-python/bdgenomics/adam/stringency.py index 6c043c51f8..5379e1bc2a 100644 --- a/adam-python/bdgenomics/adam/stringency.py +++ b/adam-python/bdgenomics/adam/stringency.py @@ -15,16 +15,36 @@ # See the License for the specific language governing permissions and # limitations under the License. # +r""" +========== +stringency +========== +.. currentmodule:: bdgenomics.adam.stringency +.. autosummary:: + :toctree: _generate/ + STRICT + LENIENT + SILENT +""" STRICT = 2 +""" + htsjdk.samtools.ValidationStringency.STRICT +""" LENIENT = 1 +""" + htsjdk.samtools.ValidationStringency.LENIENT +""" SILENT = 0 +""" + htsjdk.samtools.ValidationStringency.SILENT +""" def _toJava(stringency, jvm): """ Converts to an HTSJDK ValidationStringency enum. - + Should not be called from user code. :param bdgenomics.adam.stringency stringency: The desired stringency level. diff --git a/docs/api/genomicRdd.rst b/docs/api/genomicRdd.rst index 233ed35ea8..a30531b2bb 100644 --- a/docs/api/genomicRdd.rst +++ b/docs/api/genomicRdd.rst @@ -154,21 +154,21 @@ Similar to ``transform``/``transformDataset``, there exists a Using partitioned Parquet to speed up range based queries ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -GenomicRDDs of types ``AlignmentRecordRDD``, ``GenotypeRDD``, -``VariantRDD``, and ``NucleotideFragmentContigRDD`` can be written as Parquet -using a Hive-style hierarchical directory scheme that is based on contig and +GenomicRDDs of types ``AlignmentRecordRDD``, ``GenotypeRDD``, +``VariantRDD``, and ``NucleotideFragmentContigRDD`` can be written as Parquet +using a Hive-style hierarchical directory scheme that is based on contig and genomic position. This partitioning reduces the latency of genomic range queries against these datasets, which is particularly important for interactive applications such as a genomic browser backed by an ADAM dataset. -The genomicRDD function -``GenomicRDD.filterByOverlappingRegions(queryRegionsList)`` builds a Spark SQL +The genomicRDD function +``GenomicRDD.filterByOverlappingRegions(queryRegionsList)`` builds a Spark SQL query that uses this partitioning scheme. This can reduce latencies by more -than 20x when repeatedly querying a datset with genomic range filters. +than 20x when repeatedly querying a datset with genomic range filters. On a high coverage alignment dataset, this partitioning strategy improved latency from 1-2 minutes to 1-3 seconds when looking up genomic ranges. -**Saving partitioned parquet files to disk** +**Saving partitioned parquet files to disk** A ``GenomicRDD`` can be written to disk as a partitioned Parquet dataset with the ``GenomicRDD`` function ``saveAsPartitionedParquet``. The optional @@ -176,7 +176,8 @@ A ``GenomicRDD`` can be written to disk as a partitioned Parquet dataset with th within each contig. .. code:: scala - data.saveAsPartitionedParquet("dataset1.adam", partitionSize = 2000000) + + data.saveAsPartitionedParquet("dataset1.adam", partitionSize = 2000000) A partitioned dataset can also be created from an input Parquet or SAM/BAM/CRAM file using the ADAM ``transformAlignments`` CLI, or Parquet/VCF files using the @@ -199,29 +200,29 @@ Within each contigName directory, there are subdirectories named using a compute ``positionBin``, for example a subdirectory named ``positionBin=5``. Records from the dataset are written into Parquet files within the appropriate positionBin directory, computed based on the start position of the record using the calculation ``floor(start / partitionSize)``. -For example, when using the default ``partitionSize`` of 1,000,000 base pairs, an -alignment record with start position 20,100,000 on chromosome 22 would be found in a +For example, when using the default ``partitionSize`` of 1,000,000 base pairs, an +alignment record with start position 20,100,000 on chromosome 22 would be found in a Parquet file at the path ``mydataset.adam/contigName=22/positionBin=20``. The splitting -of data into one or more Parquet fields in these leaf directories is automatic based on +of data into one or more Parquet fields in these leaf directories is automatic based on Parquet block size settings. -.. code:: +.. code:: mySavedAdamDataset.adam | |-- _partitionedByStartPos - L-- contigName=1 + L-- contigName=1 L-- positionBin=0 |-- part-r-00001.parquet +-- part-r-00002.parquet L-- positionBin=1 |-- part-r-00003.parquet - |-- part-r-00004.parquet - L-- positionBin= ( N bins ...) + |-- part-r-00004.parquet + L-- positionBin= ( N bins ...) L-- contigName= ( N contigs ... ) |-- (N bins ... ) - - + + The existence of the file ``_partitionedByStartPos`` can be tested with the public function ``ADAMContext.isPartitioned(path)`` and can be used to determine explicitly if an ADAM Parquet dataset is partitioned using this scheme. The partition size which was used @@ -238,7 +239,7 @@ SQL queries on a ``genomicRDD.dataset`` backed by partitioned Parquet. **Re-using a previously loaded partitioned dataset:** When a partitioned dataset is first created within an ADAM session, a partition -discovery/initialization step is performed that can take several minutes for large datasets. +discovery/initialization step is performed that can take several minutes for large datasets. The original GenomicRDD object can then be re-used multiple times as the parent of different filtration and processing transformations and actions, without incurring this initializiation cost again. Thus, re-use of a parent partitioned ``GenomicRDD`` diff --git a/docs/api/overview.rst b/docs/api/overview.rst index 0e8e65fe74..84314a2303 100644 --- a/docs/api/overview.rst +++ b/docs/api/overview.rst @@ -51,10 +51,11 @@ ADAM's Python API wraps the `ADAMContext `__ and `GenomicRDD `__ APIs so they can be used from PySpark. The Python API is feature complete relative to ADAM's Java API. +`Read more about the Python API. `__ + The ADAM R API -------------- ADAM's R API wraps the `ADAMContext `__ and `GenomicRDD `__ APIs so they can be used from SparkR. The R API is feature complete relative to ADAM's Java API. - diff --git a/docs/api/python.rst b/docs/api/python.rst new file mode 100644 index 0000000000..e0304e3309 --- /dev/null +++ b/docs/api/python.rst @@ -0,0 +1,4 @@ +ADAM Python Documentation +========================= + +.. automodule:: bdgenomics.adam diff --git a/docs/conf.py b/docs/conf.py index b166a2a9ea..7098c7234a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath('../src')) +sys.path.insert(0, os.path.abspath('../adam-python')) def real_dir_name(p, n=1): p = os.path.realpath(p) @@ -45,6 +45,7 @@ def real_dir_name(p, n=1): # ones. extensions = [ 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', 'sphinx.ext.doctest', 'sphinx.ext.todo', 'sphinx.ext.coverage', @@ -53,6 +54,11 @@ def real_dir_name(p, n=1): 'sphinx.ext.mathjax', ] +extlinks = { + 'issue': ('https://github.com/bigdatagenomics/adam/issues/%s', '#'), + 'pr': ('https://github.com/bigdatagenomics/adam/pull/%s', 'PR #'), +} + intersphinx_mapping = { 'python': ('https://docs.python.org/2', None), } @@ -73,6 +79,7 @@ def skip(app, what, name, obj, skip, options): def setup(app): app.connect('autodoc-skip-member', skip) +autosummary_generate = True # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -154,12 +161,9 @@ def setup(app): # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'alabaster' +html_theme = 'sphinx_rtd_theme' html_theme_options = { - "github_banner": True, - "github_user": "bigdatagenomics", - "github_repo": "adam", - "caption_font_size": "24px" + 'collapse_navigation': True, } # Theme options are theme-specific and customize the look and feel of a theme diff --git a/docs/index.rst b/docs/index.rst index baafbc4ee9..93ff88d41d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -102,6 +102,7 @@ For more, please see our `awesome list of applications `__, enable the .. code:: bash - mvn -Ppython package + mvn -P python package This will enable the ``adam-python`` module as part of the ADAM build. This module uses Maven to invoke a Makefile that builds a Python egg and @@ -63,14 +63,24 @@ runs tests. To build this module, we require either an active `Conda `__ or `virtualenv `__ environment. +ADAM can run on both Python 2 and Python 3. `To setup and activate a Conda -environment `__, run: +environment `__ for Python 2.7, run: .. code:: bash conda create -n adam python=2.7 anaconda source activate adam +`To setup and activate a Conda +environment `__ for Python 3.6, run: + +.. code:: bash + + conda create -n adam python=3.6 anaconda + source activate adam + + `To setup and activate a virtualenv environment `__, run: diff --git a/docs/requirements.txt b/docs/requirements.txt index 4ee8d0237b..4fbce4d9f6 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1 @@ -sphinx==1.5.6 +sphinx==1.7.7 diff --git a/docs/style.css b/docs/style.css deleted file mode 100644 index 16ad12f5bb..0000000000 --- a/docs/style.css +++ /dev/null @@ -1,71 +0,0 @@ -