From 6b4c255632346c9f48ba4975e2b8bb96340735c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luciano=20Santa=20Br=C3=ADgida?= Date: Tue, 31 Jan 2017 15:21:52 -0200 Subject: [PATCH] word2vec2tensor with python 3 As discussed with @tmylk in issue 1126, python 3 does not support word.encode('utf-8'). Changing to gensim.utils.to_unicode(word) worked. Tested with Python 3.5 in Ubuntu 16.04. --- gensim/scripts/word2vec2tensor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/scripts/word2vec2tensor.py b/gensim/scripts/word2vec2tensor.py index d2184b15c0..3684a98e88 100644 --- a/gensim/scripts/word2vec2tensor.py +++ b/gensim/scripts/word2vec2tensor.py @@ -51,7 +51,7 @@ def word2vec2tensor(word2vec_model_path,tensor_filename, binary=False): with open(outfiletsv, 'w+') as file_vector: with open(outfiletsvmeta, 'w+') as file_metadata: for word in model.index2word: - file_metadata.write(word.encode('utf-8') + '\n') + file_metadata.write(gensim.utils.to_unicode(word) + '\n') vector_row = '\t'.join(map(str, model[word])) file_vector.write(vector_row + '\n')