Merge branch 'master' into v2

eggnogdb · Oct 24, 2019 · d805097 · d805097
2 parents 5a2a59d + 41a8498
commit d805097
Show file tree

Hide file tree

Showing 5 changed files with 50 additions and 43 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,11 @@
 # Overview
+**eggNOG-mapper** is a tool for fast functional annotation of novel sequences. It uses precomputed orthologous groups and phylogenies from the eggNOG database to transfer functional information from fine-grained orthologs only.
 
-eggnog-mapper is a tool for fast functional annotation of novel sequences (genes or proteins) using precomputed eggNOG-based orthology assignments. Obvious examples include the annotation of novel genomes, transcriptomes or even metagenomic gene catalogs. The use of orthology predictions for functional annotation is considered more precise than traditional homology searches, as it avoids transferring annotations from paralogs (duplicate genes with a higher chance of being involved in functional divergence).
+Common uses of eggNOG-mapper include the annotation of novel genomes, transcriptomes or even metagenomic gene catalogs.
+
+The use of orthology predictions for functional annotation permits a higher precision than traditional homology searches (i.e. BLAST searches), as it avoids transferring annotations from close paralogs (duplicate genes with a higher chance of being involved in functional divergence).
+
+Benchmarks comparing different eggNOG-mapper options against BLAST and InterProScan [can be found here](https://github.com/jhcepas/emapper-benchmark/blob/master/benchmark_analysis.ipynb).
 
 EggNOG-mapper is also available as a public online resource: http://eggnog-mapper.embl.de
 
@@ -19,8 +24,10 @@ If you use this software, please cite:
      10.1093/molbev/msx148](https://doi.org/10.1093/molbev/msx148)
 
 [2] eggNOG 5.0: a hierarchical, functionally and phylogenetically annotated
-     orthology resource based on 5090 organisms and 2502 viruses. Jaime
-     Huerta-Cepas, Damian Szklarczyk, Davide Heller, Ana Hernández-Plaza, Sofia
-     K Forslund, Helen Cook, Daniel R Mende, Ivica Letunic, Thomas Rattei, Lars
-     J Jensen, Christian von Mering, Peer Bork Nucleic Acids Res. 2019 Jan 8;
-     47(Database issue): D309–D314. doi: 10.1093/nar/gky1085 
+      orthology resource based on 5090 organisms and 2502 viruses. Jaime
+      Huerta-Cepas, Damian Szklarczyk, Davide Heller, Ana Hernández-Plaza, Sofia
+      K Forslund, Helen Cook, Daniel R Mende, Ivica Letunic, Thomas Rattei, Lars
+      J Jensen, Christian von Mering, Peer Bork Nucleic Acids Res. 2019 Jan 8;
+      47(Database issue): D309–D314. doi: 10.1093/nar/gky1085 
+```
+
diff --git a/download_eggnog_data.py b/download_eggnog_data.py
@@ -55,8 +55,8 @@ def download_og2level():
 
 if __name__ == "__main__":
     parser = ArgumentParser()
-    parser.add_argument('dbs', metavar='dbs', nargs='+', choices=sorted(EGGNOG_DATABASES.keys()+['all', 'none']),
-                        help='list of eggNOG HMM databases to download. Choose "none" if only diamond will be used')
+    # parser.add_argument('dbs', metavar='dbs', nargs='+', choices=sorted(EGGNOG_DATABASES.keys()+['all', 'none']),
+    #                     help='list of eggNOG HMM databases to download. Choose "none" if only diamond will be used')
 
     parser.add_argument('-D', action="store_true", dest='skip_diamond',
                         help='Do not install the diamond database')
@@ -82,12 +82,12 @@ def download_og2level():
     if args.data_dir:
         set_data_path(args.data_dir)
 
-    if args.force or not pexists(pjoin(get_data_path(), 'og2level.tsv.gz')):
-        print colorify('Downloading "og2level.tsv.gz" at %s' %get_data_path(), 'green')
-        download_og2level()
+    # if args.force or not pexists(pjoin(get_data_path(), 'og2level.tsv.gz')):
+    #     print colorify('Downloading "og2level.tsv.gz" at %s' %get_data_path(), 'green')
+    #     download_og2level()
 
-    if 'all' in args.dbs:
-        args.dbs = EGGNOG_DATABASES
+    # if 'all' in args.dbs:
+    #     args.dbs = EGGNOG_DATABASES
 
     if args.force or not pexists(pjoin(get_data_path(), 'eggnog.db')):
         if args.allyes or ask("Download main annotation database?") == 'y':
@@ -100,16 +100,16 @@ def download_og2level():
         if not args.quiet:
             print colorify('Skipping eggnog.db database (already present). Use -f to force download', 'lblue')
 
-    if args.force or not pexists(pjoin(get_data_path(), 'OG_fasta')):
-        if args.allyes or ask("Download OG fasta files for annotation refinement (~20GB after decompression)?") == 'y':
-            print colorify('Downloading fasta files " at %s/OG_fasta...' %get_data_path(), 'green')
-            download_groups()
-        else:
-            print 'Skipping'
+    # if args.force or not pexists(pjoin(get_data_path(), 'OG_fasta')):
+    #     if args.allyes or ask("Download OG fasta files for annotation refinement (~20GB after decompression)?") == 'y':
+    #         print colorify('Downloading fasta files " at %s/OG_fasta...' %get_data_path(), 'green')
+    #         download_groups()
+    #     else:
+    #         print 'Skipping'
 
-    else:
-        if not args.quiet:
-            print colorify('Skipping OG_fasta/ database (already present). Use -f to force download', 'lblue')
+    # else:
+    #     if not args.quiet:
+    #         print colorify('Skipping OG_fasta/ database (already present). Use -f to force download', 'lblue')
 
     if not args.skip_diamond and (args.force or not pexists(pjoin(get_data_path(), 'eggnog_proteins.dmnd'))):
         if args.allyes or ask("Download diamond database (~4GB after decompression)?") == 'y':
@@ -121,11 +121,11 @@ def download_og2level():
         if not args.quiet:
             print colorify('Skipping diamond database (or already present). Use -f to force download', 'lblue')
 
-    if set(args.dbs) != set(['none']):
-        if args.allyes or ask("Download %d HMM database(s): %s?"%(len(args.dbs), ','.join(args.dbs))) == 'y':
-            for db in args.dbs:
-                if args.force or not get_db_present(db):
-                    print colorify('Downloading %s HMM database " at %s/%s\_hmm ...' %(db, get_hmmdb_path(), db), 'green')
-                    download_hmm_database(db)
-        else:
-            print 'Skipping'
+    # if set(args.dbs) != set(['none']):
+    #     if args.allyes or ask("Download %d HMM database(s): %s?"%(len(args.dbs), ','.join(args.dbs))) == 'y':
+    #         for db in args.dbs:
+    #             if args.force or not get_db_present(db):
+    #                 print colorify('Downloading %s HMM database " at %s/%s\_hmm ...' %(db, get_hmmdb_path(), db), 'green')
+    #                 download_hmm_database(db)
+    #     else:
+    #         print 'Skipping'
diff --git a/eggnogmapper/common.py b/eggnogmapper/common.py
@@ -18,7 +18,6 @@
 
 
 ANNOTATIONS_HEADER = map(str.strip, 'Preferred_name, GOs, EC, KEGG_ko, KEGG_Pathway, KEGG_Module, KEGG_Reaction, KEGG_rclass, BRITE, KEGG_TC, CAZy, BiGG_Reaction'.split(','))
-
 
 TIMEOUT_LOAD_SERVER = 1800
 
@@ -231,10 +230,11 @@ def get_citation(addons=['hmmer']):
 
 LICENSE = """
 LICENSE:
-[1] eggNOG-mapper is free software distributed under the GPL v2 terms.
+[1] eggNOG-mapper is free software distributed under the GPL v2 terms. 
+Built-in databases (e.g. eggNOG data) might be subjected to different licensing.
 
-[2] eggNOG data are distributed under the terms of the Creative Commons Attribution
-License (http://creativecommons.org/licenses/by/4.0/), which permits
+[2] eggNOG v5.0 data are distributed under the terms of the Creative Commons Non-Commercial Attribution
+License (http://creativecommons.org/licenses/by-nc/4.0/), which permits
 unrestricted reuse, distribution, and reproduction in any medium, provided the
 original work is properly cited.
 """

diff --git a/emapper.py b/emapper.py
@@ -652,17 +652,16 @@ def _annotate_hit_line(arguments):
     swallowest_level = sorted(match_levels & set(LEVEL_DEPTH.keys()),
                               key=lambda x: LEVEL_DEPTH[x], reverse=True)[0]
 
+    annot_levels = set()
     if args.tax_scope == "auto":
         for level in TAXONOMIC_RESOLUTION:
             if level in match_levels:
-                annot_levels = set(LEVEL_CONTENT.get(level, [level]))
                 annot_levels.add(level)
-                annot_level_max = "%s[%d]" %(LEVEL_NAMES[level], len(annot_levels))
+                annot_level_max = LEVEL_NAMES.get(level, level)
                 break
     else:
-        annot_levels = set(LEVEL_CONTENT.get(args.tax_scope, [args.tax_scope]))
         annot_levels.add(args.tax_scope)
-        annot_level_max = "%s[%d]" %(args.tax_scope, len(annot_levels))
+        annot_level_max = LEVEL_NAMES.get(args.tax_scope, args.tax_scope)
 
     if args.target_taxa != 'all':
         target_taxa = orthology.normalize_target_taxa(args.target_taxa)
@@ -1049,7 +1048,7 @@ def parse_args(parser):
 
     pg_annot = parser.add_argument_group('Annotation Options')
 
-    pg_annot.add_argument("--tax_scope", type=str, choices=LEVEL_NAMES.values()+["auto"],
+    pg_annot.add_argument("--tax_scope", type=str, choices=LEVEL_NAMES.keys()+["auto"],
                     default='auto', metavar='',
                     help=("Fix the taxonomic scope used for annotation, so only orthologs from a "
                           "particular clade are used for functional transfer. "
@@ -1105,11 +1104,11 @@ def parse_args(parser):
     pg_diamond.add_argument('--gapextend', dest='gapextend', type=int, default=None, 
                     help='Gap extend  penalty')
 
-    pg_diamond.add_argument('--query-cover', dest='query_cover', type=float, default=25,
-                    help='Report only alignments above the given percentage of query cover. Default=25')
+    pg_diamond.add_argument('--query-cover', dest='query_cover', type=float, default=0,
+                    help='Report only alignments above the given percentage of query cover. Default=0')
 
-    pg_diamond.add_argument('--subject-cover', dest='subject_cover', type=float, default=25,
-                    help='Report only alignments above the given percentage of subject cover. Default=25')
+    pg_diamond.add_argument('--subject-cover', dest='subject_cover', type=float, default=0,
+                    help='Report only alignments above the given percentage of subject cover. Default=0')
 
     pg_seed = parser.add_argument_group('Seed ortholog search option')
 

diff --git a/setup.py b/setup.py
@@ -13,6 +13,7 @@
     "Operating System :: Microsoft :: Windows",
     "Operating System :: POSIX :: Linux",
     "Programming Language :: Python",
+    "Programming Language :: Python :: 2.7",
     "Topic :: Scientific/Engineering :: Bio-Informatics",
     "Topic :: Software Development :: Libraries :: Python Modules",
     ]