Skip to content

Commit

Permalink
Merge branch 'master' into v2
Browse files Browse the repository at this point in the history
  • Loading branch information
Cantalapiedra authored Oct 24, 2019
2 parents 5a2a59d + 41a8498 commit d805097
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 43 deletions.
19 changes: 13 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Overview
**eggNOG-mapper** is a tool for fast functional annotation of novel sequences. It uses precomputed orthologous groups and phylogenies from the eggNOG database to transfer functional information from fine-grained orthologs only.

eggnog-mapper is a tool for fast functional annotation of novel sequences (genes or proteins) using precomputed eggNOG-based orthology assignments. Obvious examples include the annotation of novel genomes, transcriptomes or even metagenomic gene catalogs. The use of orthology predictions for functional annotation is considered more precise than traditional homology searches, as it avoids transferring annotations from paralogs (duplicate genes with a higher chance of being involved in functional divergence).
Common uses of eggNOG-mapper include the annotation of novel genomes, transcriptomes or even metagenomic gene catalogs.

The use of orthology predictions for functional annotation permits a higher precision than traditional homology searches (i.e. BLAST searches), as it avoids transferring annotations from close paralogs (duplicate genes with a higher chance of being involved in functional divergence).

Benchmarks comparing different eggNOG-mapper options against BLAST and InterProScan [can be found here](https://github.com/jhcepas/emapper-benchmark/blob/master/benchmark_analysis.ipynb).

EggNOG-mapper is also available as a public online resource: http://eggnog-mapper.embl.de

Expand All @@ -19,8 +24,10 @@ If you use this software, please cite:
10.1093/molbev/msx148](https://doi.org/10.1093/molbev/msx148)
[2] eggNOG 5.0: a hierarchical, functionally and phylogenetically annotated
orthology resource based on 5090 organisms and 2502 viruses. Jaime
Huerta-Cepas, Damian Szklarczyk, Davide Heller, Ana Hernández-Plaza, Sofia
K Forslund, Helen Cook, Daniel R Mende, Ivica Letunic, Thomas Rattei, Lars
J Jensen, Christian von Mering, Peer Bork Nucleic Acids Res. 2019 Jan 8;
47(Database issue): D309–D314. doi: 10.1093/nar/gky1085
orthology resource based on 5090 organisms and 2502 viruses. Jaime
Huerta-Cepas, Damian Szklarczyk, Davide Heller, Ana Hernández-Plaza, Sofia
K Forslund, Helen Cook, Daniel R Mende, Ivica Letunic, Thomas Rattei, Lars
J Jensen, Christian von Mering, Peer Bork Nucleic Acids Res. 2019 Jan 8;
47(Database issue): D309–D314. doi: 10.1093/nar/gky1085
```

48 changes: 24 additions & 24 deletions download_eggnog_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ def download_og2level():

if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument('dbs', metavar='dbs', nargs='+', choices=sorted(EGGNOG_DATABASES.keys()+['all', 'none']),
help='list of eggNOG HMM databases to download. Choose "none" if only diamond will be used')
# parser.add_argument('dbs', metavar='dbs', nargs='+', choices=sorted(EGGNOG_DATABASES.keys()+['all', 'none']),
# help='list of eggNOG HMM databases to download. Choose "none" if only diamond will be used')

parser.add_argument('-D', action="store_true", dest='skip_diamond',
help='Do not install the diamond database')
Expand All @@ -82,12 +82,12 @@ def download_og2level():
if args.data_dir:
set_data_path(args.data_dir)

if args.force or not pexists(pjoin(get_data_path(), 'og2level.tsv.gz')):
print colorify('Downloading "og2level.tsv.gz" at %s' %get_data_path(), 'green')
download_og2level()
# if args.force or not pexists(pjoin(get_data_path(), 'og2level.tsv.gz')):
# print colorify('Downloading "og2level.tsv.gz" at %s' %get_data_path(), 'green')
# download_og2level()

if 'all' in args.dbs:
args.dbs = EGGNOG_DATABASES
# if 'all' in args.dbs:
# args.dbs = EGGNOG_DATABASES

if args.force or not pexists(pjoin(get_data_path(), 'eggnog.db')):
if args.allyes or ask("Download main annotation database?") == 'y':
Expand All @@ -100,16 +100,16 @@ def download_og2level():
if not args.quiet:
print colorify('Skipping eggnog.db database (already present). Use -f to force download', 'lblue')

if args.force or not pexists(pjoin(get_data_path(), 'OG_fasta')):
if args.allyes or ask("Download OG fasta files for annotation refinement (~20GB after decompression)?") == 'y':
print colorify('Downloading fasta files " at %s/OG_fasta...' %get_data_path(), 'green')
download_groups()
else:
print 'Skipping'
# if args.force or not pexists(pjoin(get_data_path(), 'OG_fasta')):
# if args.allyes or ask("Download OG fasta files for annotation refinement (~20GB after decompression)?") == 'y':
# print colorify('Downloading fasta files " at %s/OG_fasta...' %get_data_path(), 'green')
# download_groups()
# else:
# print 'Skipping'

else:
if not args.quiet:
print colorify('Skipping OG_fasta/ database (already present). Use -f to force download', 'lblue')
# else:
# if not args.quiet:
# print colorify('Skipping OG_fasta/ database (already present). Use -f to force download', 'lblue')

if not args.skip_diamond and (args.force or not pexists(pjoin(get_data_path(), 'eggnog_proteins.dmnd'))):
if args.allyes or ask("Download diamond database (~4GB after decompression)?") == 'y':
Expand All @@ -121,11 +121,11 @@ def download_og2level():
if not args.quiet:
print colorify('Skipping diamond database (or already present). Use -f to force download', 'lblue')

if set(args.dbs) != set(['none']):
if args.allyes or ask("Download %d HMM database(s): %s?"%(len(args.dbs), ','.join(args.dbs))) == 'y':
for db in args.dbs:
if args.force or not get_db_present(db):
print colorify('Downloading %s HMM database " at %s/%s\_hmm ...' %(db, get_hmmdb_path(), db), 'green')
download_hmm_database(db)
else:
print 'Skipping'
# if set(args.dbs) != set(['none']):
# if args.allyes or ask("Download %d HMM database(s): %s?"%(len(args.dbs), ','.join(args.dbs))) == 'y':
# for db in args.dbs:
# if args.force or not get_db_present(db):
# print colorify('Downloading %s HMM database " at %s/%s\_hmm ...' %(db, get_hmmdb_path(), db), 'green')
# download_hmm_database(db)
# else:
# print 'Skipping'
8 changes: 4 additions & 4 deletions eggnogmapper/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@


ANNOTATIONS_HEADER = map(str.strip, 'Preferred_name, GOs, EC, KEGG_ko, KEGG_Pathway, KEGG_Module, KEGG_Reaction, KEGG_rclass, BRITE, KEGG_TC, CAZy, BiGG_Reaction'.split(','))


TIMEOUT_LOAD_SERVER = 1800

Expand Down Expand Up @@ -231,10 +230,11 @@ def get_citation(addons=['hmmer']):

LICENSE = """
LICENSE:
[1] eggNOG-mapper is free software distributed under the GPL v2 terms.
[1] eggNOG-mapper is free software distributed under the GPL v2 terms.
Built-in databases (e.g. eggNOG data) might be subjected to different licensing.
[2] eggNOG data are distributed under the terms of the Creative Commons Attribution
License (http://creativecommons.org/licenses/by/4.0/), which permits
[2] eggNOG v5.0 data are distributed under the terms of the Creative Commons Non-Commercial Attribution
License (http://creativecommons.org/licenses/by-nc/4.0/), which permits
unrestricted reuse, distribution, and reproduction in any medium, provided the
original work is properly cited.
"""
Expand Down
17 changes: 8 additions & 9 deletions emapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,17 +652,16 @@ def _annotate_hit_line(arguments):
swallowest_level = sorted(match_levels & set(LEVEL_DEPTH.keys()),
key=lambda x: LEVEL_DEPTH[x], reverse=True)[0]

annot_levels = set()
if args.tax_scope == "auto":
for level in TAXONOMIC_RESOLUTION:
if level in match_levels:
annot_levels = set(LEVEL_CONTENT.get(level, [level]))
annot_levels.add(level)
annot_level_max = "%s[%d]" %(LEVEL_NAMES[level], len(annot_levels))
annot_level_max = LEVEL_NAMES.get(level, level)
break
else:
annot_levels = set(LEVEL_CONTENT.get(args.tax_scope, [args.tax_scope]))
annot_levels.add(args.tax_scope)
annot_level_max = "%s[%d]" %(args.tax_scope, len(annot_levels))
annot_level_max = LEVEL_NAMES.get(args.tax_scope, args.tax_scope)

if args.target_taxa != 'all':
target_taxa = orthology.normalize_target_taxa(args.target_taxa)
Expand Down Expand Up @@ -1049,7 +1048,7 @@ def parse_args(parser):

pg_annot = parser.add_argument_group('Annotation Options')

pg_annot.add_argument("--tax_scope", type=str, choices=LEVEL_NAMES.values()+["auto"],
pg_annot.add_argument("--tax_scope", type=str, choices=LEVEL_NAMES.keys()+["auto"],
default='auto', metavar='',
help=("Fix the taxonomic scope used for annotation, so only orthologs from a "
"particular clade are used for functional transfer. "
Expand Down Expand Up @@ -1105,11 +1104,11 @@ def parse_args(parser):
pg_diamond.add_argument('--gapextend', dest='gapextend', type=int, default=None,
help='Gap extend penalty')

pg_diamond.add_argument('--query-cover', dest='query_cover', type=float, default=25,
help='Report only alignments above the given percentage of query cover. Default=25')
pg_diamond.add_argument('--query-cover', dest='query_cover', type=float, default=0,
help='Report only alignments above the given percentage of query cover. Default=0')

pg_diamond.add_argument('--subject-cover', dest='subject_cover', type=float, default=25,
help='Report only alignments above the given percentage of subject cover. Default=25')
pg_diamond.add_argument('--subject-cover', dest='subject_cover', type=float, default=0,
help='Report only alignments above the given percentage of subject cover. Default=0')

pg_seed = parser.add_argument_group('Seed ortholog search option')

Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"Operating System :: Microsoft :: Windows",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python",
"Programming Language :: Python :: 2.7",
"Topic :: Scientific/Engineering :: Bio-Informatics",
"Topic :: Software Development :: Libraries :: Python Modules",
]
Expand Down

0 comments on commit d805097

Please sign in to comment.