Skip to content

Commit

Permalink
Added logging to dictionary_from_corpus function
Browse files Browse the repository at this point in the history
  • Loading branch information
buma committed Sep 6, 2012
1 parent 8c2f42c commit af9c594
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion gensim/corpora/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,9 @@ def from_corpus(corpus):
Usefull only if you do not have text corpus.
"""
result = Dictionary()
for document in corpus:
for docno, document in enumerate(corpus):
if docno % 10000 == 0:
logger.info("adding document #%i to %s" % (docno, result))
result.num_docs += 1
result.num_nnz += len(document)
for wordid, word in document:
Expand All @@ -269,6 +271,8 @@ def from_corpus(corpus):
result.token2id[str_wordid] = wordid
else:
result.dfs[wordid] += 1
logger.info("built %s from %i documents (total %i corpus positions)" %
(result, result.num_docs, result.num_pos))
return result
#endclass Dictionary

0 comments on commit af9c594

Please sign in to comment.