Skip to content

Commit

Permalink
Fix write method of file requires byte-like object in segment_wiki (#…
Browse files Browse the repository at this point in the history
…1750)

* fix write method requires byte-like object, not str

sys.stdout requires str while file with the flags 'wb' requires bytes

* split 2 cases

* remove empty line; fix build
  • Loading branch information
horpto authored and menshikh-iv committed Dec 5, 2017
1 parent 10bd7fc commit 48249bb
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions gensim/scripts/segment_wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,7 @@ def segment_and_write_all_articles(file_path, output_file, min_article_character
Number of parallel workers, max(1, multiprocessing.cpu_count() - 1) if None.
"""
if output_file is None:
outfile = sys.stdout
else:
if output_file is not None:
outfile = smart_open(output_file, 'wb')

try:
Expand All @@ -122,9 +120,13 @@ def segment_and_write_all_articles(file_path, output_file, min_article_character
output_data["section_texts"].append(section_content)
if (idx + 1) % 100000 == 0:
logger.info("processed #%d articles (at %r now)", idx + 1, article_title)
outfile.write(json.dumps(output_data) + "\n")
if output_file is None:
sys.stdout.write(json.dumps(output_data) + "\n")
else:
outfile.write((json.dumps(output_data) + "\n").encode())
finally:
outfile.close()
if output_file is not None:
outfile.close()


def extract_page_xmls(f):
Expand Down

0 comments on commit 48249bb

Please sign in to comment.