From 742110b5f927dba8695198e79a9f9d00aab5120b Mon Sep 17 00:00:00 2001 From: bhargavvader Date: Thu, 30 Jun 2016 20:23:10 +0530 Subject: [PATCH 1/3] Unicode --- gensim/models/wrappers/dtmmodel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py index a55a605391..c407162c89 100644 --- a/gensim/models/wrappers/dtmmodel.py +++ b/gensim/models/wrappers/dtmmodel.py @@ -173,9 +173,9 @@ def convert_input(self, corpus, time_slices): corpora.BleiCorpus.save_corpus(self.fcorpustxt(), corpus) with utils.smart_open(self.ftimeslices(), 'wb') as fout: - fout.write(six.u(str(len(self.time_slices)) + "\n")) + fout.write(six.u(str(len(self.time_slices)) + "\n").encode('utf-8')) for sl in time_slices: - fout.write(six.u(str(sl) + "\n")) + fout.write(six.u(str(sl) + "\n").encode('utf-8')) def train(self, corpus, time_slices, mode, model): """ From 6f460e1e5aadcfdd43492174d3df1cbf4cd8b848 Mon Sep 17 00:00:00 2001 From: bhargavvader Date: Thu, 30 Jun 2016 20:32:13 +0530 Subject: [PATCH 2/3] Used utils.to_utf8 --- gensim/models/wrappers/dtmmodel.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py index c407162c89..c6cbf4a980 100644 --- a/gensim/models/wrappers/dtmmodel.py +++ b/gensim/models/wrappers/dtmmodel.py @@ -171,11 +171,10 @@ def convert_input(self, corpus, time_slices): logger.info("serializing temporary corpus to %s" % self.fcorpustxt()) # write out the corpus in a file format that DTM understands: corpora.BleiCorpus.save_corpus(self.fcorpustxt(), corpus) - with utils.smart_open(self.ftimeslices(), 'wb') as fout: - fout.write(six.u(str(len(self.time_slices)) + "\n").encode('utf-8')) + fout.write(six.u(utils.to_utf8(str(len(self.time_slices)) + "\n"))) for sl in time_slices: - fout.write(six.u(str(sl) + "\n").encode('utf-8')) + fout.write(six.u(utils.to_utf8(str(sl) + "\n"))) def train(self, corpus, time_slices, mode, model): """ From 6d54fbbf1c048b14ae603916488472895f1b904d Mon Sep 17 00:00:00 2001 From: bhargavvader Date: Thu, 30 Jun 2016 20:36:58 +0530 Subject: [PATCH 3/3] PEP8 line --- gensim/models/wrappers/dtmmodel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py index c6cbf4a980..35fe3f4df7 100644 --- a/gensim/models/wrappers/dtmmodel.py +++ b/gensim/models/wrappers/dtmmodel.py @@ -171,6 +171,7 @@ def convert_input(self, corpus, time_slices): logger.info("serializing temporary corpus to %s" % self.fcorpustxt()) # write out the corpus in a file format that DTM understands: corpora.BleiCorpus.save_corpus(self.fcorpustxt(), corpus) + with utils.smart_open(self.ftimeslices(), 'wb') as fout: fout.write(six.u(utils.to_utf8(str(len(self.time_slices)) + "\n"))) for sl in time_slices: