Skip to content

Commit

Permalink
Improve six usage (xrange, map, zip`) (#2264)
Browse files Browse the repository at this point in the history
* replace xrange with range + import from six.moves

Now the code contains both range and xrange. It's not consistent. Since
Python2 support will end soon, it's better to make the code more like in
Python3.

* fix build
  • Loading branch information
horpto authored and menshikh-iv committed Dec 17, 2018
1 parent b8219ac commit 3d5a21c
Show file tree
Hide file tree
Showing 34 changed files with 137 additions and 138 deletions.
2 changes: 1 addition & 1 deletion docs/notebooks/Wordrank_comparisons.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1071,7 +1071,7 @@
" # sort analogies according to their mean frequences \n",
" copy_mean_freq = sorted(copy_mean_freq.items(), key=lambda x: x[1][1])\n",
" # prepare analogies buckets according to given size\n",
" for centre_p in xrange(bucket_size//2, len(copy_mean_freq), bucket_size):\n",
" for centre_p in range(bucket_size//2, len(copy_mean_freq), bucket_size):\n",
" bucket = copy_mean_freq[centre_p-bucket_size//2:centre_p+bucket_size//2]\n",
" b_acc = 0\n",
" # calculate current bucket accuracy with b_acc count\n",
Expand Down
6 changes: 3 additions & 3 deletions docs/notebooks/translation_matrix.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@
"duration = []\n",
"sizeofword = []\n",
"\n",
"for idx in xrange(0, test_case):\n",
"for idx in range(0, test_case):\n",
" sub_pair = word_pair[: (idx + 1) * step]\n",
"\n",
" startTime = time.time()\n",
Expand Down Expand Up @@ -1450,15 +1450,15 @@
"small_train_docs = train_docs[:15000]\n",
"# train for small corpus\n",
"model1.build_vocab(small_train_docs)\n",
"for epoch in xrange(50):\n",
"for epoch in range(50):\n",
" shuffle(small_train_docs)\n",
" model1.train(small_train_docs, total_examples=len(small_train_docs), epochs=1)\n",
"model.save(\"small_doc_15000_iter50.bin\")\n",
"\n",
"large_train_docs = train_docs + test_docs\n",
"# train for large corpus\n",
"model2.build_vocab(large_train_docs)\n",
"for epoch in xrange(50):\n",
"for epoch in range(50):\n",
" shuffle(large_train_docs)\n",
" model2.train(large_train_docs, total_examples=len(train_docs), epochs=1)\n",
"# save the model\n",
Expand Down
6 changes: 3 additions & 3 deletions gensim/corpora/_mmreader.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ from __future__ import with_statement
from gensim import utils

from six import string_types
from six.moves import xrange
from six.moves import range
import logging

cimport cython
Expand Down Expand Up @@ -148,7 +148,7 @@ cdef class MmReader(object):

# return implicit (empty) documents between previous id and new id
# too, to keep consistent document numbering and corpus length
for previd in xrange(previd + 1, docid):
for previd in range(previd + 1, docid):
yield previd, []

# from now on start adding fields to a new document, with a new id
Expand All @@ -163,7 +163,7 @@ cdef class MmReader(object):

# return empty documents between the last explicit document and the number
# of documents as specified in the header
for previd in xrange(previd + 1, self.num_docs):
for previd in range(previd + 1, self.num_docs):
yield previd, []

def docbyoffset(self, offset):
Expand Down
4 changes: 2 additions & 2 deletions gensim/corpora/bleicorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from gensim import utils
from gensim.corpora import IndexedCorpus
from six.moves import xrange
from six.moves import range


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -161,7 +161,7 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
fname_vocab = utils.smart_extension(fname, '.vocab')
logger.info("saving vocabulary of %i words to %s", num_terms, fname_vocab)
with utils.smart_open(fname_vocab, 'wb') as fout:
for featureid in xrange(num_terms):
for featureid in range(num_terms):
fout.write(utils.to_utf8("%s\n" % id2word.get(featureid, '---')))

return offsets
Expand Down
7 changes: 3 additions & 4 deletions gensim/corpora/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
from gensim import utils

from six import PY3, iteritems, iterkeys, itervalues, string_types
from six.moves import xrange
from six.moves import zip as izip
from six.moves import zip, range

if sys.version_info[0] >= 3:
unicode = str
Expand Down Expand Up @@ -466,7 +465,7 @@ def compactify(self):
logger.debug("rebuilding dictionary, shrinking gaps")

# build mapping from old id -> new id
idmap = dict(izip(sorted(itervalues(self.token2id)), xrange(len(self.token2id))))
idmap = dict(zip(sorted(itervalues(self.token2id)), range(len(self.token2id))))

# reassign mappings to new ids
self.token2id = {token: idmap[tokenid] for token, tokenid in iteritems(self.token2id)}
Expand Down Expand Up @@ -703,7 +702,7 @@ def from_corpus(corpus, id2word=None):

if id2word is None:
# make sure length(result) == get_max_id(corpus) + 1
result.token2id = {unicode(i): i for i in xrange(max_id + 1)}
result.token2id = {unicode(i): i for i in range(max_id + 1)}
else:
# id=>word mapping given: simply copy it
result.token2id = {utils.to_unicode(token): idx for idx, token in iteritems(id2word)}
Expand Down
4 changes: 2 additions & 2 deletions gensim/corpora/lowcorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from gensim import utils
from gensim.corpora import IndexedCorpus
from six import iterkeys
from six.moves import xrange, zip as izip
from six.moves import zip, range


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -109,7 +109,7 @@ def __init__(self, fname, id2word=None, line2words=split_on_space):
all_terms.update(word for word, wordCnt in doc)
all_terms = sorted(all_terms) # sort the list of all words; rank in that list = word's integer id
# build a mapping of word id(int) -> word (string)
self.id2word = dict(izip(xrange(len(all_terms)), all_terms))
self.id2word = dict(zip(range(len(all_terms)), all_terms))
else:
logger.info("using provided word mapping (%i ids)", len(id2word))
self.id2word = id2word
Expand Down
14 changes: 7 additions & 7 deletions gensim/corpora/sharded_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import scipy.sparse as sparse
import time

from six.moves import xrange
from six.moves import range

import gensim
from gensim.corpora import IndexedCorpus
Expand Down Expand Up @@ -432,7 +432,7 @@ def resize_shards(self, shardsize):
new_shard_names = []
new_offsets = [0]

for new_shard_idx in xrange(n_new_shards):
for new_shard_idx in range(n_new_shards):
new_start = shardsize * new_shard_idx
new_stop = new_start + shardsize

Expand Down Expand Up @@ -461,7 +461,7 @@ def resize_shards(self, shardsize):

# Move old shard files out, new ones in. Complicated due to possibility
# of exceptions.
old_shard_names = [self._shard_name(n) for n in xrange(self.n_shards)]
old_shard_names = [self._shard_name(n) for n in range(self.n_shards)]
try:
for old_shard_n, old_shard_name in enumerate(old_shard_names):
os.remove(old_shard_name)
Expand Down Expand Up @@ -644,7 +644,7 @@ def __getitem__(self, offset):
s_result = self.__add_to_slice(s_result, result_start, result_stop, shard_start, shard_stop)

# First and last get special treatment, these are in between
for shard_n in xrange(first_shard + 1, last_shard):
for shard_n in range(first_shard + 1, last_shard):
self.load_shard(shard_n)

result_start = result_stop
Expand Down Expand Up @@ -735,7 +735,7 @@ def row_sparse2gensim(row_idx, csr_matrix):
g_row = [(col_idx, csr_matrix[row_idx, col_idx]) for col_idx in indices]
return g_row

output = (row_sparse2gensim(i, result) for i in xrange(result.shape[0]))
output = (row_sparse2gensim(i, result) for i in range(result.shape[0]))

return output

Expand All @@ -745,7 +745,7 @@ def _getitem_dense2gensim(self, result):
output = gensim.matutils.full2sparse(result)
else:
output = (gensim.matutils.full2sparse(result[i])
for i in xrange(result.shape[0]))
for i in range(result.shape[0]))
return output

# Overriding the IndexedCorpus and other corpus superclass methods
Expand All @@ -754,7 +754,7 @@ def __iter__(self):
Yield dataset items one by one (generator).
"""
for i in xrange(len(self)):
for i in range(len(self)):
yield self[i]

def save(self, *args, **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions gensim/corpora/ucicorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from gensim.corpora import IndexedCorpus
from gensim.matutils import MmReader
from gensim.matutils import MmWriter
from six.moves import xrange
from six.moves import range


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -287,7 +287,7 @@ def save_corpus(fname, corpus, id2word=None, progress_cnt=10000, metadata=False)
fname_vocab = utils.smart_extension(fname, '.vocab')
logger.info("saving vocabulary of %i words to %s", num_terms, fname_vocab)
with utils.smart_open(fname_vocab, 'wb') as fout:
for featureid in xrange(num_terms):
for featureid in range(num_terms):
fout.write(utils.to_utf8("%s\n" % id2word.get(featureid, '---')))

logger.info("storing corpus in UCI Bag-of-Words format: %s", fname)
Expand Down
4 changes: 2 additions & 2 deletions gensim/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import logging

from gensim import utils, matutils
from six.moves import xrange
from six.moves import range


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -385,7 +385,7 @@ def __iter__(self):
# assumes `self.corpus` holds the index as a 2-d numpy array.
# this is true for MatrixSimilarity and SparseMatrixSimilarity, but
# may not be true for other (future) classes..?
for chunk_start in xrange(0, self.index.shape[0], self.chunksize):
for chunk_start in range(0, self.index.shape[0], self.chunksize):
# scipy.sparse doesn't allow slicing beyond real size of the matrix
# (unlike numpy). so, clip the end of the chunk explicitly to make
# scipy.sparse happy
Expand Down
8 changes: 4 additions & 4 deletions gensim/matutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from scipy.special import psi # gamma function utils

from six import iteritems, itervalues, string_types
from six.moves import xrange, zip as izip
from six.moves import zip, range


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -586,7 +586,7 @@ def __iter__(self):
Document in BoW format.
"""
for indprev, indnow in izip(self.sparse.indptr, self.sparse.indptr[1:]):
for indprev, indnow in zip(self.sparse.indptr, self.sparse.indptr[1:]):
yield list(zip(self.sparse.indices[indprev:indnow], self.sparse.data[indprev:indnow]))

def __len__(self):
Expand Down Expand Up @@ -1516,7 +1516,7 @@ def __iter__(self):

# return implicit (empty) documents between previous id and new id
# too, to keep consistent document numbering and corpus length
for previd in xrange(previd + 1, docid):
for previd in range(previd + 1, docid):
yield previd, []

# from now on start adding fields to a new document, with a new id
Expand All @@ -1531,7 +1531,7 @@ def __iter__(self):

# return empty documents between the last explicit document and the number
# of documents as specified in the header
for previd in xrange(previd + 1, self.num_docs):
for previd in range(previd + 1, self.num_docs):
yield previd, []

def docbyoffset(self, offset):
Expand Down
8 changes: 4 additions & 4 deletions gensim/models/atmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
from gensim.corpora import MmCorpus
from itertools import chain
from scipy.special import gammaln # gamma function utils
from six.moves import xrange
from six.moves import range
import six

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -482,7 +482,7 @@ def inference(self, chunk, author2doc, doc2author, rhot, collect_sstats=False, c
phinorm = self.compute_phinorm(expElogthetad, expElogbetad)

# Iterate between gamma and phi until convergence
for _ in xrange(self.iterations):
for _ in range(self.iterations):
lastgamma = tilde_gamma.copy()

# Update gamma.
Expand Down Expand Up @@ -699,7 +699,7 @@ def update(self, corpus=None, author2doc=None, doc2author=None, chunksize=None,
# Just keep training on the already available data.
# Assumes self.update() has been called before with input documents and corresponding authors.
assert self.total_docs > 0, 'update() was called with no documents to train on.'
train_corpus_idx = [d for d in xrange(self.total_docs)]
train_corpus_idx = [d for d in range(self.total_docs)]
num_input_authors = len(self.author2doc)
else:
if doc2author is None and author2doc is None:
Expand Down Expand Up @@ -816,7 +816,7 @@ def update(self, corpus=None, author2doc=None, doc2author=None, chunksize=None,
def rho():
return pow(offset + pass_ + (self.num_updates / chunksize), -decay)

for pass_ in xrange(passes):
for pass_ in range(passes):
if self.dispatcher:
logger.info('initializing %s workers', self.numworkers)
self.dispatcher.reset(self.state)
Expand Down
6 changes: 3 additions & 3 deletions gensim/models/base_any2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
import logging
from timeit import default_timer
import threading
from six.moves import xrange
from six.moves import range
from six import itervalues, string_types
from gensim import matutils
from numpy import float32 as REAL, ones, random, dtype, zeros
Expand Down Expand Up @@ -284,7 +284,7 @@ def _job_producer(self, data_iterator, job_queue, cur_epoch=0, total_examples=No
)

# give the workers heads up that they can finish -- no more work!
for _ in xrange(self.workers):
for _ in range(self.workers):
job_queue.put(None)
logger.debug("job loop exiting, total %i jobs", job_no)

Expand Down Expand Up @@ -472,7 +472,7 @@ def _train_epoch(self, data_iterable, cur_epoch=0, total_examples=None, total_wo
threading.Thread(
target=self._worker_loop,
args=(job_queue, progress_queue,))
for _ in xrange(self.workers)
for _ in range(self.workers)
]

workers.append(threading.Thread(
Expand Down
6 changes: 3 additions & 3 deletions gensim/models/deprecated/doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
from gensim.models.deprecated.old_saveload import SaveLoad

from gensim import matutils # utility fnc for pickling, common scipy operations etc
from six.moves import xrange, zip
from six.moves import zip, range
from six import string_types, integer_types

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -490,7 +490,7 @@ def reset_weights(self, model):
self.doctag_syn0 = empty((length, model.vector_size), dtype=REAL)
self.doctag_syn0_lockf = ones((length,), dtype=REAL) # zeros suppress learning

for i in xrange(length):
for i in range(length):
# construct deterministic seed from index AND model seed
seed = "%d %s" % (model.seed, self.index_to_doctag(i))
self.doctag_syn0[i] = model.seeded_vector(seed)
Expand All @@ -510,7 +510,7 @@ def init_sims(self, replace=False):
if getattr(self, 'doctag_syn0norm', None) is None or replace:
logger.info("precomputing L2-norms of doc weight vectors")
if replace:
for i in xrange(self.doctag_syn0.shape[0]):
for i in range(self.doctag_syn0.shape[0]):
self.doctag_syn0[i, :] /= sqrt((self.doctag_syn0[i, :] ** 2).sum(-1))
self.doctag_syn0norm = self.doctag_syn0
else:
Expand Down
8 changes: 4 additions & 4 deletions gensim/models/deprecated/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@
from gensim import utils, matutils # utility fnc for pickling, common scipy operations etc
from gensim.corpora.dictionary import Dictionary
from six import string_types, iteritems
from six.moves import xrange
from six.moves import range
from scipy import stats


Expand Down Expand Up @@ -239,7 +239,7 @@ def add_word(word, weights):

if binary:
binary_len = dtype(REAL).itemsize * vector_size
for _ in xrange(vocab_size):
for _ in range(vocab_size):
# mixed text and binary: read text first, then binary
word = []
while True:
Expand All @@ -254,7 +254,7 @@ def add_word(word, weights):
weights = fromstring(fin.read(binary_len), dtype=REAL)
add_word(word, weights)
else:
for line_no in xrange(vocab_size):
for line_no in range(vocab_size):
line = fin.readline()
if line == b'':
raise EOFError("unexpected end of input; is count incorrect or file otherwise damaged?")
Expand Down Expand Up @@ -1084,7 +1084,7 @@ def init_sims(self, replace=False):
if getattr(self, 'syn0norm', None) is None or replace:
logger.info("precomputing L2-norms of word weight vectors")
if replace:
for i in xrange(self.syn0.shape[0]):
for i in range(self.syn0.shape[0]):
self.syn0[i, :] /= sqrt((self.syn0[i, :] ** 2).sum(-1))
self.syn0norm = self.syn0
else:
Expand Down
Loading

0 comments on commit 3d5a21c

Please sign in to comment.