diff --git a/236_NGSpeciesID/Biopython-1.83-foss-2023a.eb b/236_NGSpeciesID/Biopython-1.83-foss-2023a.eb new file mode 100644 index 00000000..4c021941 --- /dev/null +++ b/236_NGSpeciesID/Biopython-1.83-foss-2023a.eb @@ -0,0 +1,45 @@ +## +# Author: Robert Mijakovic +## +easyblock = 'PythonPackage' + +name = 'Biopython' +version = '1.83' + +homepage = 'https://www.biopython.org' +description = """Biopython is a set of freely available tools for biological + computation written in Python by an international team of developers. It is + a distributed collaborative effort to develop Python libraries and + applications which address the needs of current and future work in + bioinformatics. """ + +toolchain = {'name': 'foss', 'version': '2023a'} + +source_urls = ['https://biopython.org/DIST'] +sources = [SOURCELOWER_TAR_GZ] +checksums = ['78e6bfb78de63034037afd35fe77cb6e0a9e5b62706becf78a7d922b16ed83f7'] + +dependencies = [ + ('Python', '3.11.3'), + ('SciPy-bundle', '2023.07'), +] + +download_dep_fail = True +use_pip = True +sanity_pip_check = True + +# Run only tests that don't require internet connection +runtest = 'python setup.py test --offline' + +sanity_check_paths = { + 'files': [], + 'dirs': ['lib/python%(pyshortver)s/site-packages/Bio', + 'lib/python%(pyshortver)s/site-packages/BioSQL'] +} + +# extra check to ensure numpy dependency is available +sanity_check_commands = ["python -c 'import Bio.MarkovModel'"] + +options = {'modulename': 'Bio'} + +moduleclass = 'bio' diff --git a/236_NGSpeciesID/Pysam-0.22.0-GCC-12.3.0.eb b/236_NGSpeciesID/Pysam-0.22.0-GCC-12.3.0.eb new file mode 100644 index 00000000..27c7b71f --- /dev/null +++ b/236_NGSpeciesID/Pysam-0.22.0-GCC-12.3.0.eb @@ -0,0 +1,32 @@ +easyblock = 'PythonPackage' + +name = 'Pysam' +version = '0.22.0' + +homepage = 'https://github.com/pysam-developers/pysam' +description = """Pysam is a python module for reading and manipulating Samfiles. + It's a lightweight wrapper of the samtools C-API. Pysam also includes an interface for tabix.""" + +toolchain = {'name': 'GCC', 'version': '12.3.0'} + +source_urls = ['https://github.com/pysam-developers/pysam/archive/'] +sources = ['v%(version)s.tar.gz'] +checksums = ['61b3377c5f889ddc6f6979912c3bb960d7e08407dada9cb38f13955564ea036f'] + +builddependencies = [('Python-bundle-PyPI', '2023.06')] + +dependencies = [ + ('Python', '3.11.3'), + ('ncurses', '6.4'), + ('cURL', '8.0.1'), + ('XZ', '5.4.2'), +] + +# https://github.com/pysam-developers/pysam/issues/1146#issuecomment-1309421979 +preinstallopts = """export CFLAGS="-fPIC $CFLAGS" && """ +download_dep_fail = True +use_pip = True + +sanity_pip_check = True + +moduleclass = 'bio' diff --git a/236_NGSpeciesID/Racon-1.5.0-GCCcore-12.3.0.eb b/236_NGSpeciesID/Racon-1.5.0-GCCcore-12.3.0.eb new file mode 100644 index 00000000..f1a61f59 --- /dev/null +++ b/236_NGSpeciesID/Racon-1.5.0-GCCcore-12.3.0.eb @@ -0,0 +1,29 @@ +easyblock = 'CMakeMake' + +name = 'Racon' +version = '1.5.0' + +homepage = 'https://github.com/lbcb-sci/racon' +description = """Ultrafast consensus module for raw de novo genome assembly of long uncorrected reads.""" + +toolchain = {'name': 'GCCcore', 'version': '12.3.0'} + +github_account = 'lbcb-sci' +source_urls = [GITHUB_SOURCE] +sources = ['%(version)s.tar.gz'] +checksums = ['41e362f71cc03b934f17d6e2c0d626e1b2997258261b14551586de006666424a'] + +builddependencies = [ + ('CMake', '3.26.3'), + ('binutils', '2.40'), + # ('git', '2.38.1', '-nodocs'), +] + +sanity_check_paths = { + 'files': ['bin/racon'], + 'dirs': [], +} + +sanity_check_commands = ['racon --help'] + +moduleclass = 'bio' diff --git a/236_NGSpeciesID/edlib-1.3.9-GCC-12.3.0.eb b/236_NGSpeciesID/edlib-1.3.9-GCC-12.3.0.eb new file mode 100644 index 00000000..87cdf99d --- /dev/null +++ b/236_NGSpeciesID/edlib-1.3.9-GCC-12.3.0.eb @@ -0,0 +1,24 @@ +easyblock = 'PythonPackage' + +name = 'edlib' +version = '1.3.9' + +homepage = 'https://martinsos.github.io/edlib' +description = "Lightweight, super fast library for sequence alignment using edit (Levenshtein) distance." + +toolchain = {'name': 'GCC', 'version': '12.3.0'} + +sources = [SOURCE_TAR_GZ] +checksums = ['64c3dfab3ebe3e759565a0cc71eb4df23cf3ce1713fd558af3c473dddc2a3766'] + +dependencies = [ + ('Python', '3.11.3'), +] + +download_dep_fail = True +use_pip = True +sanity_pip_check = True + +moduleclass = 'bio' + +# TODO edlib.bycython.cpp:198:12: fatal error: longintrepr.h: No such file or directory diff --git a/236_NGSpeciesID/intervaltree-python-3.1.0-GCCcore-12.3.0.eb b/236_NGSpeciesID/intervaltree-python-3.1.0-GCCcore-12.3.0.eb new file mode 100644 index 00000000..cd94d62e --- /dev/null +++ b/236_NGSpeciesID/intervaltree-python-3.1.0-GCCcore-12.3.0.eb @@ -0,0 +1,35 @@ +# Author: Jasper Grimm (UoY) +# Update: Petr Král (INUITS) + +easyblock = 'PythonPackage' + +name = 'intervaltree-python' +_modname = 'intervaltree' +version = '3.1.0' + +homepage = 'https://github.com/chaimleib/intervaltree' +description = """A mutable, self-balancing interval tree. Queries may be by + point, by range overlap, or by range containment. +""" + +toolchain = {'name': 'GCCcore', 'version': '12.3.0'} + +source_urls = ['https://pypi.python.org/packages/source/i/%s' % _modname] +sources = ['%s-%s.tar.gz' % (_modname, version)] +checksums = ['902b1b88936918f9b2a19e0e5eb7ccb430ae45cde4f39ea4b36932920d33952d'] + +builddependencies = [('binutils', '2.40')] + +dependencies = [ + ('Python', '3.11.3'), +] + +use_pip = True +download_dep_fail = True +sanity_pip_check = True + +options = {'modulename': _modname} + +moduleclass = 'bio' + +# TODO intervaltree 3.1.0 requires sortedcontainers, which is not installed. diff --git a/236_NGSpeciesID/medaka-1.11.3-foss-2023a.eb b/236_NGSpeciesID/medaka-1.11.3-foss-2023a.eb new file mode 100644 index 00000000..f958152b --- /dev/null +++ b/236_NGSpeciesID/medaka-1.11.3-foss-2023a.eb @@ -0,0 +1,71 @@ +# This is a contribution from HPCNow! (http://hpcnow.com) +# Copyright:: HPCNow! +# Authors:: Danilo Gonzalez +# License:: GPL-v3.0 +# Updated to foss-2020b to use with artic tool +# J. Sassmannshausen (GSTT/NHS UK) +# Updated to 1.5.0 +# Jasper Grimm (UoY) +# Updated: Petr Král (INUITS) + +easyblock = 'PythonBundle' + +name = 'medaka' +version = '1.11.3' + +homepage = 'https://github.com/nanoporetech/medaka' +description = "medaka is a tool to create a consensus sequence from nanopore sequencing data." + +toolchain = {'name': 'foss', 'version': '2023a'} +toolchainopts = {'pic': True} + +builddependencies = [('Autotools', '20220317')] + +_minimap_ver = '2.26' +dependencies = [ + ('Python', '3.11.3'), + ('Python-bundle-PyPI', '2023.06'), # includes cffi + # tensorflow~=2.10.0 required by medaka 1.9.1, see requirements.txt + ('TensorFlow', '2.13.0'), + ('Pysam', '0.21.0'), # TODO + ('SAMtools', '1.18'), + ('minimap2', _minimap_ver), + ('HTSlib', '1.18'), # for tabix, bgzip + ('Racon', '1.5.0'), # TODO + ('edlib', '1.3.9'), # TODO + ('pyspoa', '0.2.1'), + ('python-parasail', '1.3.4'), # TODO + ('ont-fast5-api', '4.1.1'), # TODO + ('WhatsHap', '2.1'), # TODO + ('intervaltree-python', '3.1.0'), # TODO + ('BCFtools', '1.18'), +] + +use_pip = True +sanity_pip_check = True + +exts_list = [ + ('mappy', _minimap_ver, { + 'checksums': ['e53fbe9a3ea8762a64b8103f4f779c9fb16d418eaa0a731f45cebc83867a9b71'], + }), + ('wurlitzer', '3.0.3', { + 'checksums': ['224f5fe70618be3872c05dfddc8c457191ec1870654596279fcc1edadebe3e5b'], + }), + (name, version, { + 'checksums': ['4440762a17ddd66806ddbd7c3218140caa234b96a8c919ed54d7243d3e4a5dd1'], + # Some requirements are too strict. + 'preinstallopts': "sed -i 's/tensorflow.*/tensorflow/g;s/cffi==/cffi>=/g' requirements.txt && ", + }), +] + +sanity_check_paths = { + 'files': ['bin/medaka', 'bin/medaka_consensus', 'bin/medaka_version_report'], + 'dirs': ['lib/python%(pyshortver)s/site-packages'], +} + +sanity_check_commands = [ + "medaka --help", + "medaka_version_report", +] + +moduleclass = 'bio' diff --git a/236_NGSpeciesID/minimap2-2.26-GCCcore-12.3.0.eb b/236_NGSpeciesID/minimap2-2.26-GCCcore-12.3.0.eb new file mode 100644 index 00000000..180de610 --- /dev/null +++ b/236_NGSpeciesID/minimap2-2.26-GCCcore-12.3.0.eb @@ -0,0 +1,54 @@ +# This file is an EasyBuild reciPY as per https://github.com/easybuilders/easybuild +# Adam Huffman +# DeepThought, Flinders University +# Updated to 2.22 +# R.QIAO + +# Update Petr Král (INUITS) +easyblock = 'MakeCp' + +name = 'minimap2' +version = '2.26' + +homepage = 'https://github.com/lh3/minimap2' +description = """Minimap2 is a fast sequence mapping and alignment +program that can find overlaps between long noisy reads, or map long +reads or their assemblies to a reference genome optionally with detailed +alignment (i.e. CIGAR). At present, it works efficiently with query +sequences from a few kilobases to ~100 megabases in length at an error +rate ~15%. Minimap2 outputs in the PAF or the SAM format. On limited +test data sets, minimap2 is over 20 times faster than most other +long-read aligners. It will replace BWA-MEM for long reads and contig +alignment.""" + +toolchain = {'name': 'GCCcore', 'version': '12.3.0'} + +source_urls = ['https://github.com/lh3/%(name)s/releases/download/v%(version)s/'] +sources = ['%(name)s-%(version)s.tar.bz2'] +checksums = ['6a588efbd273bff4f4808d5190957c50272833d2daeb4407ccf4c1b78143624c'] + +builddependencies = [('binutils', '2.40')] + +dependencies = [('zlib', '1.2.13')] + +buildopts = 'CC="${CC}" CFLAGS="${CFLAGS}" INCLUDES="${CPPFLAGS}"' + +files_to_copy = [ + (['%(name)s'], 'bin'), + (['lib%(name)s.a'], 'lib'), + (['*.h'], 'include'), + 'LICENSE.txt', 'NEWS.md', 'README.md', + (['%(name)s.1'], 'share/man/man1') +] + +sanity_check_paths = { + 'files': ['bin/%(name)s', 'lib/lib%(name)s.a'], + 'dirs': ['include'] +} + +sanity_check_commands = [ + "minimap2 --help", + "cd %(builddir)s/minimap2-%(version)s && minimap2 -a test/MT-human.fa test/MT-orang.fa > test.sam", +] + +moduleclass = 'bio' diff --git a/236_NGSpeciesID/ont-fast5-api-4.1.2-foss-2023a.eb b/236_NGSpeciesID/ont-fast5-api-4.1.2-foss-2023a.eb new file mode 100644 index 00000000..98a6063a --- /dev/null +++ b/236_NGSpeciesID/ont-fast5-api-4.1.2-foss-2023a.eb @@ -0,0 +1,43 @@ +easyblock = 'PythonBundle' + +name = 'ont-fast5-api' +version = '4.1.2' + +homepage = 'https://github.com/nanoporetech/ont_fast5_api' +description = "ont_fast5_api is a simple interface to HDF5 files of the Oxford Nanopore .fast5 file format." + +toolchain = {'name': 'foss', 'version': '2023a'} + +dependencies = [ + ('Python', '3.11.3'), + ('SciPy-bundle', '2023.07'), + ('h5py', '3.9.0'), +] + +use_pip = True + +exts_list = [ + ('progressbar33', '2.4', { + 'modulename': 'progressbar', + 'checksums': ['51fe0d9b3b4023db2f983eeccdfc8c9846b84db8443b9bee002c7f58f4376eff'], + }), + (name, version, { + 'checksums': ['c7c59c6100e992ef8bc239cdf91f7a8ab46abf57ecd689f94b2b98e72a9e9472'], + }), +] + +sanity_check_paths = { + 'files': ['bin/compress_fast5', 'bin/fast5_subset', 'bin/multi_to_single_fast5', 'bin/single_to_multi_fast5'], + 'dirs': [''], +} + +sanity_check_commands = [ + "compress_fast5 --help", + "fast5_subset --help", + "multi_to_single_fast5 --help", + "single_to_multi_fast5 --help", +] + +sanity_pip_check = True + +moduleclass = 'bio' diff --git a/236_NGSpeciesID/parasail-2.6.2-GCC-12.3.0.eb b/236_NGSpeciesID/parasail-2.6.2-GCC-12.3.0.eb new file mode 100644 index 00000000..35f2284f --- /dev/null +++ b/236_NGSpeciesID/parasail-2.6.2-GCC-12.3.0.eb @@ -0,0 +1,27 @@ +easyblock = 'CMakeMake' + +name = 'parasail' +version = '2.6.2' + +homepage = 'https://github.com/jeffdaily/parasail' +description = """parasail is a SIMD C (C99) library containing implementations + of the Smith-Waterman (local), Needleman-Wunsch (global), and semi-global + pairwise sequence alignment algorithms. """ + +toolchain = {'name': 'GCC', 'version': '12.3.0'} + +# https://github.com/jeffdaily/parasail +github_account = 'jeffdaily' +source_urls = [GITHUB_SOURCE] +sources = ['v%(version)s.tar.gz'] +checksums = ['9057041db8e1cde76678f649420b85054650414e5de9ea84ee268756c7ea4b4b'] + +builddependencies = [('CMake', '3.26.3')] + +sanity_check_paths = { + 'files': ['bin/parasail_aligner', 'bin/parasail_stats', + 'lib/libparasail.%s' % SHLIB_EXT, 'include/parasail.h'], + 'dirs': [], +} + +moduleclass = 'bio' diff --git a/236_NGSpeciesID/pyfaidx-0.8.0-GCCcore-12.3.0.eb b/236_NGSpeciesID/pyfaidx-0.8.0-GCCcore-12.3.0.eb new file mode 100644 index 00000000..d8f51b24 --- /dev/null +++ b/236_NGSpeciesID/pyfaidx-0.8.0-GCCcore-12.3.0.eb @@ -0,0 +1,43 @@ +easyblock = 'PythonBundle' + +name = 'pyfaidx' +version = '0.8.0' + +homepage = 'https://pypi.python.org/pypi/pyfaidx' +description = "pyfaidx: efficient pythonic random access to fasta subsequences" + +toolchain = {'name': 'GCCcore', 'version': '12.3.0'} + +builddependencies = [ + ('binutils', '2.40'), + ('Python-bundle-PyPI', '2023.06'), +] + +dependencies = [('Python', '3.11.3')] + +sanity_pip_check = True +use_pip = True + +exts_list = [ + ('zipp', '3.17.0', { + 'source_tmpl': SOURCE_PY3_WHL, + 'checksums': ['0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31'], + }), + ('importlib_metadata', '7.0.1', { + 'source_tmpl': SOURCE_PY3_WHL, + 'checksums': ['4805911c3a4ec7c3966410053e9ec6a1fecd629117df5adee56dfc9432a1081e'], + }), + (name, version, { + 'source_tmpl': SOURCE_PY3_WHL, + 'checksums': ['0e40f1a45e52131bcc32dc686c7876cc794505e5dd3e3504c3f56cddb5266930'], + }), +] + +sanity_check_paths = { + 'files': ['bin/faidx'], + 'dirs': ['lib/python%(pyshortver)s/site-packages'], +} + +sanity_check_commands = ["faidx --help"] + +moduleclass = 'bio' diff --git a/236_NGSpeciesID/pyspoa-0.2.1-GCC-12.3.0.eb b/236_NGSpeciesID/pyspoa-0.2.1-GCC-12.3.0.eb new file mode 100644 index 00000000..96e58f23 --- /dev/null +++ b/236_NGSpeciesID/pyspoa-0.2.1-GCC-12.3.0.eb @@ -0,0 +1,62 @@ +easyblock = 'PythonPackage' + +name = 'pyspoa' +version = '0.2.1' + +local_cereal_version = '1.3.2' + +homepage = 'https://github.com/nanoporetech/pyspoa' +description = "Python bindings to spoa." + +toolchain = {'name': 'GCC', 'version': '12.3.0'} + +toolchainopts = {'extra_cflags': "-fpermissive"} + +sources = [ + { + 'filename': '%(name)s-%(version)s.tar.gz', + 'git_config': { + 'url': 'https://github.com/nanoporetech', + 'repo_name': 'pyspoa', + 'tag': 'v%(version)s', + 'recursive': True, + 'keep_git_dir': True, + } + }, + { + 'source_urls': ['https://github.com/USCiLab/cereal/archive/'], + 'download_filename': 'v%s.tar.gz' % local_cereal_version, + 'filename': 'cereal-%s.tar.gz' % local_cereal_version, + }, +] + +patches = ['pyspoa-%(version)s_use-spoa-dep.patch'] + +checksums = [ + None, + '16a7ad9b31ba5880dac55d62b5d6f243c3ebc8d46a3514149e56b5e7ea81f85f', + 'dffd946e3b36e4872846fe983d287f992b5bf177798e11141bf0d645cfc0664d', +] + +builddependencies = [('CMake', '3.26.3')] + +dependencies = [ + ('Python', '3.11.3'), + ('pybind11', '2.11.1'), + ('spoa', '4.1.0'), +] + +download_dep_fail = True +use_pip = True +sanity_pip_check = True + +preinstallopts = "mkdir -p src/vendor/cereal && ln -s %(builddir)s/cereal-*/include src/vendor/cereal/include && " +# strip out cmake requirements, since we provide that as proper dependency +preinstallopts += "sed -i 's/.cmake==[0-9.]*.//g' setup.py && " +preinstallopts += "export libspoa=$EBROOTSPOA/lib/libspoa.a && " + +options = {'modulename': 'spoa'} + +sanity_check_commands = ["cd %(builddir)s/*/tests && python test_pyspoa.py"] + +moduleclass = 'lib' diff --git a/236_NGSpeciesID/python-parasail-1.3.4-foss-2023a.eb b/236_NGSpeciesID/python-parasail-1.3.4-foss-2023a.eb new file mode 100644 index 00000000..20e6d4c7 --- /dev/null +++ b/236_NGSpeciesID/python-parasail-1.3.4-foss-2023a.eb @@ -0,0 +1,32 @@ +easyblock = 'PythonPackage' + +name = 'python-parasail' +version = '1.3.4' + +homepage = 'https://github.com/jeffdaily/parasail-python' +description = "Python Bindings for the Parasail C Library" + +toolchain = {'name': 'foss', 'version': '2023a'} + +source_urls = ['https://pypi.python.org/packages/source/%(nameletter)s/parasail'] +sources = ['parasail-%(version)s.tar.gz'] +checksums = ['d6a7035dfae3ef5aafdd7e6915711214c22b572ea059fa69d9d7ecbfb9b61b0f'] + +builddependencies = [ + ('parasail', '2.6.2'), +] +dependencies = [ + ('Python', '3.11.3'), + ('SciPy-bundle', '2023.07'), +] + +download_dep_fail = True +use_pip = True +sanity_pip_check = True + +# make sure setup.py finds the parasail library +preinstallopts = "ln -s $EBROOTPARASAIL/lib/libparasail.so parasail/libparasail.%s && " % SHLIB_EXT + +options = {'modulename': 'parasail'} + +moduleclass = 'bio' diff --git a/236_NGSpeciesID/spoa-4.1.0-GCC-12.3.0.eb b/236_NGSpeciesID/spoa-4.1.0-GCC-12.3.0.eb new file mode 100644 index 00000000..b85132ee --- /dev/null +++ b/236_NGSpeciesID/spoa-4.1.0-GCC-12.3.0.eb @@ -0,0 +1,29 @@ +easyblock = 'CMakeMake' + +name = 'spoa' +version = '4.1.0' + +homepage = 'https://github.com/rvaser/spoa' +description = """Spoa (SIMD POA) is a c++ implementation of the partial order alignment (POA) algorithm + which is used to generate consensus sequences""" + +toolchain = {'name': 'GCC', 'version': '12.3.0'} +toolchainopts = {'pic': True} + +source_urls = ['https://github.com/rvaser/spoa/archive/'] +sources = ['%(version)s.tar.gz'] +checksums = ['43238356f00bce2ad9698fc18b3e320024172a82182dcff24d57c5cf19e342c8'] + +builddependencies = [('CMake', '3.26.3')] + +configopts = "-Dspoa_build_executable=ON" + +sanity_check_paths = { + 'files': ['bin/spoa'] + ['include/spoa/%s' % x for x in ['alignment_engine.hpp', 'graph.hpp', 'spoa.hpp']] + + ['lib/libspoa.a'], + 'dirs': [], +} + +sanity_check_commands = ["spoa --help"] + +moduleclass = 'bio'