Skip to content

Commit

Permalink
Fixed #157, new tests (#137) and Singularity recipes (#149), h/t to @…
Browse files Browse the repository at this point in the history
…cschuh for the new recipe
  • Loading branch information
lucventurini committed Mar 21, 2019
1 parent 0c2cc18 commit f91b3d5
Show file tree
Hide file tree
Showing 9 changed files with 236 additions and 84 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
*.pyc
*.pyd
*.simg
.#*
#*
*.coverage*
Expand Down
36 changes: 23 additions & 13 deletions Mikado/parsers/GFF.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,17 @@
from . import Parser
from .gfannotation import GFAnnotation
from sys import intern
import re


# This class has exactly how many attributes I need it to have
# pylint: disable=too-many-instance-attributes
class GffLine(GFAnnotation):
"""Object which serializes a GFF line."""

# The (?:;|$) means "match, but **do not capture**, either semicolon or end of the line.
_attribute_pattern = re.compile(r"([^;]*)=([^$=]*)(?:;|$)")

def __init__(self, line, my_line='', header=False):
"""
Constructor method.
Expand All @@ -41,19 +45,25 @@ def _parse_attributes(self):

self.attribute_order = []

for item in iter(x for x in self._attr.rstrip().split(';') if x != ''):
itemized = item.strip().split('=')
try:
if itemized[0].lower() == "parent":
self.parent = itemized[1].split(",")

elif itemized[0].upper() == "ID":
self.id = itemized[1]
else:
self.attributes[itemized[0]] = itemized[1]
self.attribute_order.append(itemized[0])
except IndexError:
pass
infolist = re.findall(self._attribute_pattern, self._attr.rstrip().rstrip(";"))

for item in infolist:
key, val = item
if key.lower() == "parent":
self.parent = val.split(",")
elif key.upper() == "ID":
self.id = val
else:
try:
val = int(val)
except ValueError:
try:
val = float(val)
except ValueError:
pass
finally:
self.attributes[key] = val
self.attribute_order.append(key)

def _format_attributes(self):
"""
Expand Down
58 changes: 25 additions & 33 deletions Mikado/parsers/GTF.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from . import Parser
from .gfannotation import GFAnnotation
import re


# This class has exactly how many attributes I need it to have
Expand Down Expand Up @@ -35,6 +36,8 @@ class GtfLine(GFAnnotation):
# _slots=['chrom','source','feature','start',\
# 'end','score','strand','phase','info']

_attribute_pattern = re.compile(r"([^;\s]*) \"([^\"]*)\"(?:;|$)")

def __init__(self, line, my_line='', header=False):

self.__frame = None
Expand All @@ -49,40 +52,29 @@ def _parse_attributes(self):
:return:
"""

# info_list = []
for info in iter(x for x in self._attr.rstrip().split(';') if x != ''):
info = info.strip().split(' ')
# info_list.append(info)
# info = info.lstrip().split(' ')
# for info in iter(x for x in self._attr.rstrip().split(';') if x != ''):
# info = info.strip().split(' ')
# # info_list.append(info)
# # info = info.lstrip().split(' ')
# try:
# self.attributes[info[0]] = info[1].replace('"', '')
# except IndexError as exc:
# # something wrong has happened, let us just skip
# import sys
# print("Wrong attributes ({}) in line:\n{}".format(info, "\t".join(self._fields)), file=sys.stderr)
# if info[0] == "exon_number":
# self.attributes['exon_number'] = int(self.attributes['exon_number'])

infodict = dict(re.findall(self._attribute_pattern, self._attr.rstrip()))
for key, val in infodict.items():
try:
self.attributes[info[0]] = info[1].replace('"', '')
except IndexError as exc:
# something wrong has happened, let us just skip
import sys
print("Wrong attributes ({}) in line:\n{}".format(info, "\t".join(self._fields)), file=sys.stderr)
if info[0] == "exon_number":
self.attributes['exon_number'] = int(self.attributes['exon_number'])
# elif info[0] in ("nearest_ref", "tss_id"):
# setattr(self, info[0], info[1])

# try:
#
# except IndexError:
# raise IndexError(info_list, info)

# if 'exon_number' in self.attributes:
# self.attributes['exon_number'] = int(self.attributes['exon_number'])
assert 'gene_id', 'transcript_id' in self.attributes

# if 'nearest_ref' in self.attributes:
# self.nearest_ref = self.attributes['nearest_ref']
# if 'tss_id' in self.attributes:
# self.tss_id = self.attributes['tss_id']

# for tag in iter(att for att in self.attributes if
# att not in ('gene_id', 'transcript_id', 'nearest_ref',
# 'tss_id', 'class_code')):
# self.__dict__[tag.lower()] = self.attributes[tag]
val = int(val)
except ValueError:
try:
val = float(val)
except ValueError:
val = val.replace('"', '')
self.attributes[key] = val

def _format_attributes(self):

Expand Down
22 changes: 22 additions & 0 deletions Mikado/tests/parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,28 @@ def test_length(self):
gtf_line.header, gtf_line.start, gtf_line.end = True, None, gtf_line.end
self.assertEqual(len(gff_line), 0)

def test_pesky_gtf_line(self):

line = """LG01\tbam2gtf\ttranscript\t44857\t46213\t60\t+\t.\tgene_id "transcript/12468.gene"; transcript_id "transcript/12468"; NM "92"; ms "842"; AS "806"; nn "0"; tp "P"; cm "145"; s1 "625"; s2 "372"; de "0.07109999656677246"; SA "GmG20150304_scaffold_7394,26960,-,673S2322M750D143S,60,68;GmG20150304_scaffold_5658,14965,+,50S192M723D2896S,37,22;"; coverage "100.0"; cigar "8M1I86M9I37M1I213M4I553M266N3M1I41M1D56M4I93M2028H”;"""
gtf_line = parsers.GTF.GtfLine(line)
self.assertEqual(gtf_line.feature, "transcript")
self.assertIn("SA", gtf_line.attributes.keys(), gtf_line.attributes)
self.assertEqual(
gtf_line.attributes["SA"],
"GmG20150304_scaffold_7394,26960,-,673S2322M750D143S,60,68;" +
"GmG20150304_scaffold_5658,14965,+,50S192M723D2896S,37,22;",
gtf_line.attributes)

def test_pesky_gff_line(self):

line = """LG01\tbam2gtf\ttranscript\t44857\t46213\t60\t+\t.\tID=transcript/12468;Parent="transcript/12468.gene";NM=92;ms=842;AS=806;nn=0;tp=P;cm=145;s1=625;s2=372;de=0.07109999656677246;SA=GmG20150304_scaffold_7394,26960,-,673S2322M750D143S,60,68;GmG20150304_scaffold_5658,14965,+,50S192M723D2896S,37,22;;coverage=100.0;cigar=8M1I86M9I37M1I213M4I553M266N3M1I41M1D56M4I93M2028H;"""
gff_line = parsers.GFF.GffLine(line)
self.assertEqual(gff_line.feature, "transcript")
self.assertEqual(
gff_line.attributes["SA"],
"GmG20150304_scaffold_7394,26960,-,673S2322M750D143S,60,68;" +
"GmG20150304_scaffold_5658,14965,+,50S192M723D2896S,37,22;")


if __name__ == '__main__':
unittest.main()
11 changes: 6 additions & 5 deletions Mikado/tests/test_system_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -1270,8 +1270,8 @@ def __get_purgeable_gff(self):
Chr1 foo exon 100 800 . + . gene_id "foo1"; transcript_id "foo1.2"
Chr1 foo exon 1900 2000 . + . gene_id "foo1"; transcript_id "foo1.2"
Chr1 foo transcript 10000 20000 . + . gene_id "foo2"; transcript_id "foo2.1"
Chr1 foo exon 10000 13000 . + . gene_id "foo2; transcript_id "foo2.1"
Chr1 foo exon 19000 20000 . + . gene_id "foo"; transcript_id "foo2.1"""
Chr1 foo exon 10000 13000 . + . gene_id "foo2"; transcript_id "foo2.1"
Chr1 foo exon 19000 20000 . + . gene_id "foo"; transcript_id "foo2.1\""""

dir = tempfile.TemporaryDirectory()
temp_gtf = tempfile.NamedTemporaryFile(mode="wt", suffix=".gtf", dir=dir.name, delete=True)
Expand Down Expand Up @@ -1336,10 +1336,10 @@ def test_purging1(self):

with to_gff(os.path.join(dir.name,
self.json_conf["pick"]["files"]["loci_out"])) as gff:

lines = [line for line in gff if line.header is False]
self.assertGreater(len(lines), 0)
self.assertTrue(any([_ for _ in lines if _.attributes.get("alias", "") == "foo2.1"]))
self.assertTrue(any([_ for _ in lines if _.attributes.get("alias", "") == "foo2.1"]),
"\n".join([str(_) for _ in lines]))
if purging is True:
self.assertFalse(any([_ for _ in lines if _.attributes.get("alias", "") in ("foo1.2", "foo1.1")]))
else:
Expand Down Expand Up @@ -1460,7 +1460,8 @@ def test_purging3(self):
self.json_conf["pick"]["files"]["loci_out"])) as gff:
lines = [line for line in gff if line.header is False]
self.assertGreater(len(lines), 0)
self.assertTrue(any([_ for _ in lines if _.attributes.get("alias", "") == "foo2.1"]))
self.assertTrue(any([_ for _ in lines if _.attributes.get("alias", "") == "foo2.1"]),
"\n".join([str(_) for _ in lines]))
if purging is True:
self.assertFalse(any([_ for _ in lines if _.attributes.get("alias", "") == "foo1.2"]))
else:
Expand Down
3 changes: 1 addition & 2 deletions Mikado/transcripts/transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,15 +489,14 @@ def __initialize_with_gf(self, transcript_row: (GffLine, GtfLine)):
booleans = {"True": True, "False": False, "None": None}

for key, val in transcript_row.attributes.items():

if not isinstance(val, Hashable):
pass
elif val in booleans:
val = booleans[val]
else:
try:
val = int(val)
except ValueError:
except (ValueError, OverflowError):
try:
val = float(val)
except ValueError:
Expand Down
30 changes: 20 additions & 10 deletions Singularity.centos.def
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ Include: yum wget
mikado --help

%environment
export MIKADO_COMMIT_HASH=$(cd /usr/local/src/mikado && git log | head -n 1 | cut -f 2 -d " ")
export PATH="/usr/local/bin:$PATH:/usr/local/conda/bin/"
# source /usr/local/conda/bin/activate


%post

### Install your packages ###
Expand Down Expand Up @@ -52,26 +54,34 @@ Include: yum wget
# Install python requirements
git clone https://github.com/EI-CoreBioinformatics/mikado.git
cd mikado
git log | head -n 1 | cut -f 2 -d " " > ~/MIKADO_COMMIT_HASH
sed -i 's/;.*//' requirements.txt
conda install --update-all -y -c conda-forge -c bioconda -c anaconda --file requirements.txt
python setup.py bdist_wheel
pip install dist/*whl

echo '#!/bin/bash' >> /usr/local/bin/show_commit_hash
echo 'cd /usr/local/src/mikado' >> /usr/local/bin/show_commit_hash
echo 'git log | head -n1 | cut -f 2 -d " "' >> /usr/local/bin/show_commit_hash
chmod 775 /usr/local/bin/show_commit_hash

# Various dependencies for Daijin
conda install -y -c bioconda -c anaconda -c conda-forge samtools==1.9 openssl=1.0 prodigal blast diamond==0.9.24 transdecoder==5.5.0 stringtie==1.3.4 cufflinks==2.2.1 hisat2==2.1.0 gmap==2018.07.04 portcullis trinity star==2.7.0b minimap2==2.15
cd /opt/software
wget https://github.com/Kingsford-Group/scallop/releases/download/v0.10.3/scallop-0.10.3_linux_x86_64.tar.gz && tar xaf scallop-0.10.3_linux_x86_64.tar.gz
mv scallop-0.10.3_linux_x86_64/scallop /usr/local/conda/bin/ && rm -rf scallop-0.10.3_linux_x86_64 scallop-0.10.3_linux_x86_64.tar.gz
yum install -y unzip
wget https://github.com/mourisl/CLASS/archive/v2.1.7.zip && unzip v2.1.7.zip && rm v2.1.7.zip
cd /opt/software/CLASS-2.1.7/ && sh build.sh && mv -t /usr/local/conda/bin/ class junc clnb grader addXS
cd /opt/software/
rm -rf /opt/software/CLASS-2.1.7/
cd /mnt/
conda install -y -c bioconda -c anaconda -c conda-forge samtools==1.9 openssl=1.0 prodigal blast diamond==0.9.24 transdecoder==5.5.0

%apprun snakemake
snakemake "@"

%apprun mikado
mikado "@"

%apprun daijin
daijin "@"

%apprun prodigal
prodigal "@"

%apprun samtools
samtools "@"

%apprun diamond
diamond "@"
49 changes: 28 additions & 21 deletions Singularity.ubuntu.def
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ Include: apt wget
mikado --help

%environment
export MIKADO_COMMIT_HASH=$(cd /usr/local/src/mikado && git log | head -n 1 | cut -f 2 -d " ")
export PATH="/usr/local/bin:$PATH:/usr/local/conda/bin/"
# . /usr/local/conda/bin/activate
# source /usr/local/conda/bin/activate
# conda activate python36


%post

Expand All @@ -30,47 +30,54 @@ Include: apt wget
gcc --version
make --version

# Clean up yum
# Clean up apt
apt clean

cd /usr/local/src
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh -b -p /usr/local/conda
export PATH="/usr/local/conda/bin:$PATH"
. /usr/local/conda/bin/activate
# sudo bash -c "/usr/local/conda/bin/conda init bash"
conda update -n base -c defaults conda
conda update -y -n base -c defaults conda
ln -s /usr/local/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
conda install -y -c conda-forge pip python==3.6.7

python3 --version
cd $(dirname $(which python3))
cd /opt/software/

# Install python requirements
git clone https://github.com/EI-CoreBioinformatics/mikado.git
cd mikado
git log | head -n 1 | cut -f 2 -d " " > MIKADO_COMMIT_HASH
sed -i 's/;.*//' requirements.txt
conda install --update-all -y -c conda-forge -c bioconda -c anaconda --file requirements.txt
python setup.py bdist_wheel
pip install dist/*whl
# mikado --help

echo '#!/bin/bash' >> /usr/local/bin/show_commit_hash
echo 'cd /usr/local/src/mikado' >> /usr/local/bin/show_commit_hash
echo 'git log | head -n1 | cut -f 2 -d " "' >> /usr/local/bin/show_commit_hash
chmod 775 /usr/local/bin/show_commit_hash

# Various dependencies for Daijin
conda install -y -c bioconda -c anaconda -c conda-forge samtools==1.9 openssl=1.0 prodigal blast diamond==0.9.24 transdecoder==5.5.0 stringtie==1.3.4 cufflinks==2.2.1 hisat2==2.1.0 gmap==2018.07.04 portcullis trinity star==2.7.0b minimap2==2.15
cd /opt/software
wget https://github.com/Kingsford-Group/scallop/releases/download/v0.10.3/scallop-0.10.3_linux_x86_64.tar.gz && tar xaf scallop-0.10.3_linux_x86_64.tar.gz
mv scallop-0.10.3_linux_x86_64/scallop /usr/local/conda/bin/ && rm -rf scallop-0.10.3_linux_x86_64 scallop-0.10.3_linux_x86_64.tar.gz
apt install -y unzip
wget https://github.com/mourisl/CLASS/archive/v2.1.7.zip && unzip v2.1.7.zip && rm v2.1.7.zip
cd /opt/software/CLASS-2.1.7/ && sh build.sh && mv -t /usr/local/conda/bin/ class junc clnb grader addXS
cd /opt/software/
rm -rf /opt/software/CLASS-2.1.7/
cd /mnt/
conda install -y -c bioconda -c anaconda -c conda-forge samtools==1.9 openssl=1.0 prodigal blast diamond==0.9.24 transdecoder==5.5.0

%apprun snakemake
snakemake "@"

%apprun mikado
mikado "@"

%apprun daijin
daijin "@"

%apprun snakemake
snakemake "@"

%apprun mikado
mikado "@"

%apprun prodigal
prodigal "@"

%apprun samtools
samtools "@"

%apprun diamond
diamond "@"
Loading

0 comments on commit f91b3d5

Please sign in to comment.