Skip to content

Commit

Permalink
Add name_only option for downloader api (#2143)
Browse files Browse the repository at this point in the history
* handle deprecation

* handle max_count

* change flag name

* make flake8 compatible

* move max_vocab to prepare vocab

* correct max_vocab semantics

* remove unnecessary nextline

* fix bug and make flake8 complaint

* refactor code and change sorting to key based

* add tests

* introduce effective_min_count

* make flake8 compliant

* remove clobbering of min_count

* remove min_count assertion

* .\gensim\models\word2vec.py

* Revert ".\gensim\models\word2vec.py"

This reverts commit 6c06fbc.

* rename max_vocab to max_final_vocab

* update test to max_final_vocab

* move and modify comment docs

* make flake8 compliant

* refactor word2vec.py

* handle possible old model load errors

* include effective_min_count tests

* make flake compliant

* remove check for max_final_vocab

* include backward compat for 3.3 models

* remove unnecessary newline

* add test case for max_final_vocab

* add name only option to downloader api

* add tests

* make single argument option for name_only

* make name_only into name
  • Loading branch information
aneesh-joshi authored and menshikh-iv committed Aug 3, 2018
1 parent 61728a0 commit 4520adf
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
15 changes: 12 additions & 3 deletions gensim/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
Also, this API available via CLI::
python -m gensim.downloader --info <dataname> # same as api.info(dataname)
python -m gensim.downloader --info name # same as api.info(name_only=True)
python -m gensim.downloader --download <dataname> # same as api.load(dataname, return_path=True)
"""
Expand Down Expand Up @@ -154,7 +155,7 @@ def _calculate_md5_checksum(fname):
return hash_md5.hexdigest()


def info(name=None, show_only_latest=True):
def info(name=None, show_only_latest=True, name_only=False):
"""Provide the information related to model/dataset.
Parameters
Expand All @@ -164,6 +165,8 @@ def info(name=None, show_only_latest=True):
show_only_latest : bool, optional
If storage contains different versions for one data/model, this flag allow to hide outdated versions.
Affects only if `name` is None.
name_only : bool, optional
If True, will return only the names of available models and corpora.
Returns
-------
Expand Down Expand Up @@ -205,6 +208,9 @@ def info(name=None, show_only_latest=True):
if not show_only_latest:
return information

if name_only:
return {"corpora": list(information['corpora'].keys()), "models": list(information['models'])}

return {
"corpora": {name: data for (name, data) in information['corpora'].items() if data.get("latest", True)},
"models": {name: data for (name, data) in information['models'].items() if data.get("latest", True)}
Expand Down Expand Up @@ -444,5 +450,8 @@ def load(name, return_path=False):
data_path = load(args.download[0], return_path=True)
logger.info("Data has been installed and data path is %s", data_path)
elif args.info is not None:
output = info() if (args.info == full_information) else info(name=args.info)
print(json.dumps(output, indent=4))
if args.info == 'name':
print(json.dumps(info(name_only=True), indent=4))
else:
output = info() if (args.info == full_information) else info(name=args.info)
print(json.dumps(output, indent=4))
3 changes: 3 additions & 0 deletions gensim/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ def test_info(self):
self.assertEqual(sorted(data.keys()), sorted(['models', 'corpora']))
self.assertTrue(len(data['models']))
self.assertTrue(len(data['corpora']))
name_only_data = api.info(name_only=True)
self.assertEqual(len(name_only_data.keys()), 2)
self.assertTrue({'models', 'corpora'} == set(name_only_data))


if __name__ == '__main__':
Expand Down

0 comments on commit 4520adf

Please sign in to comment.