Skip to content

Commit

Permalink
replace xrange with range + import from six.moves
Browse files Browse the repository at this point in the history
Now the code contains both range and xrange. It's not consistent. Since
Python2 support will end soon, it's better to make the code more like in
Python3.
  • Loading branch information
horpto committed Nov 11, 2018
1 parent 7e4965e commit 7d173a0
Show file tree
Hide file tree
Showing 34 changed files with 133 additions and 133 deletions.
2 changes: 1 addition & 1 deletion docs/notebooks/Wordrank_comparisons.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1071,7 +1071,7 @@
" # sort analogies according to their mean frequences \n",
" copy_mean_freq = sorted(copy_mean_freq.items(), key=lambda x: x[1][1])\n",
" # prepare analogies buckets according to given size\n",
" for centre_p in xrange(bucket_size//2, len(copy_mean_freq), bucket_size):\n",
" for centre_p in range(bucket_size//2, len(copy_mean_freq), bucket_size):\n",
" bucket = copy_mean_freq[centre_p-bucket_size//2:centre_p+bucket_size//2]\n",
" b_acc = 0\n",
" # calculate current bucket accuracy with b_acc count\n",
Expand Down
6 changes: 3 additions & 3 deletions docs/notebooks/translation_matrix.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@
"duration = []\n",
"sizeofword = []\n",
"\n",
"for idx in xrange(0, test_case):\n",
"for idx in range(0, test_case):\n",
" sub_pair = word_pair[: (idx + 1) * step]\n",
"\n",
" startTime = time.time()\n",
Expand Down Expand Up @@ -1450,15 +1450,15 @@
"small_train_docs = train_docs[:15000]\n",
"# train for small corpus\n",
"model1.build_vocab(small_train_docs)\n",
"for epoch in xrange(50):\n",
"for epoch in range(50):\n",
" shuffle(small_train_docs)\n",
" model1.train(small_train_docs, total_examples=len(small_train_docs), epochs=1)\n",
"model.save(\"small_doc_15000_iter50.bin\")\n",
"\n",
"large_train_docs = train_docs + test_docs\n",
"# train for large corpus\n",
"model2.build_vocab(large_train_docs)\n",
"for epoch in xrange(50):\n",
"for epoch in range(50):\n",
" shuffle(large_train_docs)\n",
" model2.train(large_train_docs, total_examples=len(train_docs), epochs=1)\n",
"# save the model\n",
Expand Down
6 changes: 3 additions & 3 deletions gensim/corpora/_mmreader.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ from __future__ import with_statement
from gensim import utils

from six import string_types
from six.moves import xrange
from six.moves import range
import logging

cimport cython
Expand Down Expand Up @@ -148,7 +148,7 @@ cdef class MmReader(object):

# return implicit (empty) documents between previous id and new id
# too, to keep consistent document numbering and corpus length
for previd in xrange(previd + 1, docid):
for previd in range(previd + 1, docid):
yield previd, []

# from now on start adding fields to a new document, with a new id
Expand All @@ -163,7 +163,7 @@ cdef class MmReader(object):

# return empty documents between the last explicit document and the number
# of documents as specified in the header
for previd in xrange(previd + 1, self.num_docs):
for previd in range(previd + 1, self.num_docs):
yield previd, []

def docbyoffset(self, offset):
Expand Down
4 changes: 2 additions & 2 deletions gensim/corpora/bleicorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from gensim import utils
from gensim.corpora import IndexedCorpus
from six.moves import xrange
from six.moves import range


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -159,7 +159,7 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
fname_vocab = utils.smart_extension(fname, '.vocab')
logger.info("saving vocabulary of %i words to %s", num_terms, fname_vocab)
with utils.smart_open(fname_vocab, 'wb') as fout:
for featureid in xrange(num_terms):
for featureid in range(num_terms):
fout.write(utils.to_utf8("%s\n" % id2word.get(featureid, '---')))

return offsets
Expand Down
7 changes: 3 additions & 4 deletions gensim/corpora/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
from gensim import utils

from six import PY3, iteritems, iterkeys, itervalues, string_types
from six.moves import xrange
from six.moves import zip as izip
from six.moves import zip, range

if sys.version_info[0] >= 3:
unicode = str
Expand Down Expand Up @@ -465,7 +464,7 @@ def compactify(self):
logger.debug("rebuilding dictionary, shrinking gaps")

# build mapping from old id -> new id
idmap = dict(izip(sorted(itervalues(self.token2id)), xrange(len(self.token2id))))
idmap = dict(zip(sorted(itervalues(self.token2id)), range(len(self.token2id))))

# reassign mappings to new ids
self.token2id = {token: idmap[tokenid] for token, tokenid in iteritems(self.token2id)}
Expand Down Expand Up @@ -702,7 +701,7 @@ def from_corpus(corpus, id2word=None):

if id2word is None:
# make sure length(result) == get_max_id(corpus) + 1
result.token2id = {unicode(i): i for i in xrange(max_id + 1)}
result.token2id = {unicode(i): i for i in range(max_id + 1)}
else:
# id=>word mapping given: simply copy it
result.token2id = {utils.to_unicode(token): idx for idx, token in iteritems(id2word)}
Expand Down
4 changes: 2 additions & 2 deletions gensim/corpora/lowcorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from gensim import utils
from gensim.corpora import IndexedCorpus
from six import iterkeys
from six.moves import xrange, zip as izip
from six.moves import zip, range


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -109,7 +109,7 @@ def __init__(self, fname, id2word=None, line2words=split_on_space):
all_terms.update(word for word, wordCnt in doc)
all_terms = sorted(all_terms) # sort the list of all words; rank in that list = word's integer id
# build a mapping of word id(int) -> word (string)
self.id2word = dict(izip(xrange(len(all_terms)), all_terms))
self.id2word = dict(zip(range(len(all_terms)), all_terms))
else:
logger.info("using provided word mapping (%i ids)", len(id2word))
self.id2word = id2word
Expand Down
14 changes: 7 additions & 7 deletions gensim/corpora/sharded_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import scipy.sparse as sparse
import time

from six.moves import xrange
from six.moves import range

import gensim
from gensim.corpora import IndexedCorpus
Expand Down Expand Up @@ -432,7 +432,7 @@ def resize_shards(self, shardsize):
new_shard_names = []
new_offsets = [0]

for new_shard_idx in xrange(n_new_shards):
for new_shard_idx in range(n_new_shards):
new_start = shardsize * new_shard_idx
new_stop = new_start + shardsize

Expand Down Expand Up @@ -461,7 +461,7 @@ def resize_shards(self, shardsize):

# Move old shard files out, new ones in. Complicated due to possibility
# of exceptions.
old_shard_names = [self._shard_name(n) for n in xrange(self.n_shards)]
old_shard_names = [self._shard_name(n) for n in range(self.n_shards)]
try:
for old_shard_n, old_shard_name in enumerate(old_shard_names):
os.remove(old_shard_name)
Expand Down Expand Up @@ -644,7 +644,7 @@ def __getitem__(self, offset):
s_result = self.__add_to_slice(s_result, result_start, result_stop, shard_start, shard_stop)

# First and last get special treatment, these are in between
for shard_n in xrange(first_shard + 1, last_shard):
for shard_n in range(first_shard + 1, last_shard):
self.load_shard(shard_n)

result_start = result_stop
Expand Down Expand Up @@ -735,7 +735,7 @@ def row_sparse2gensim(row_idx, csr_matrix):
g_row = [(col_idx, csr_matrix[row_idx, col_idx]) for col_idx in indices]
return g_row

output = (row_sparse2gensim(i, result) for i in xrange(result.shape[0]))
output = (row_sparse2gensim(i, result) for i in range(result.shape[0]))

return output

Expand All @@ -745,7 +745,7 @@ def _getitem_dense2gensim(self, result):
output = gensim.matutils.full2sparse(result)
else:
output = (gensim.matutils.full2sparse(result[i])
for i in xrange(result.shape[0]))
for i in range(result.shape[0]))
return output

# Overriding the IndexedCorpus and other corpus superclass methods
Expand All @@ -754,7 +754,7 @@ def __iter__(self):
Yield dataset items one by one (generator).
"""
for i in xrange(len(self)):
for i in range(len(self)):
yield self[i]

def save(self, *args, **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions gensim/corpora/ucicorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from gensim.corpora import IndexedCorpus
from gensim.matutils import MmReader
from gensim.matutils import MmWriter
from six.moves import xrange
from six.moves import range


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -285,7 +285,7 @@ def save_corpus(fname, corpus, id2word=None, progress_cnt=10000, metadata=False)
fname_vocab = utils.smart_extension(fname, '.vocab')
logger.info("saving vocabulary of %i words to %s", num_terms, fname_vocab)
with utils.smart_open(fname_vocab, 'wb') as fout:
for featureid in xrange(num_terms):
for featureid in range(num_terms):
fout.write(utils.to_utf8("%s\n" % id2word.get(featureid, '---')))

logger.info("storing corpus in UCI Bag-of-Words format: %s", fname)
Expand Down
4 changes: 2 additions & 2 deletions gensim/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import logging

from gensim import utils, matutils
from six.moves import xrange
from six.moves import range


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -385,7 +385,7 @@ def __iter__(self):
# assumes `self.corpus` holds the index as a 2-d numpy array.
# this is true for MatrixSimilarity and SparseMatrixSimilarity, but
# may not be true for other (future) classes..?
for chunk_start in xrange(0, self.index.shape[0], self.chunksize):
for chunk_start in range(0, self.index.shape[0], self.chunksize):
# scipy.sparse doesn't allow slicing beyond real size of the matrix
# (unlike numpy). so, clip the end of the chunk explicitly to make
# scipy.sparse happy
Expand Down
8 changes: 4 additions & 4 deletions gensim/matutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from scipy.special import psi # gamma function utils

from six import iteritems, itervalues, string_types
from six.moves import xrange, zip as izip
from six.moves import zip, range


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -586,7 +586,7 @@ def __iter__(self):
Document in BoW format.
"""
for indprev, indnow in izip(self.sparse.indptr, self.sparse.indptr[1:]):
for indprev, indnow in zip(self.sparse.indptr, self.sparse.indptr[1:]):
yield list(zip(self.sparse.indices[indprev:indnow], self.sparse.data[indprev:indnow]))

def __len__(self):
Expand Down Expand Up @@ -1516,7 +1516,7 @@ def __iter__(self):

# return implicit (empty) documents between previous id and new id
# too, to keep consistent document numbering and corpus length
for previd in xrange(previd + 1, docid):
for previd in range(previd + 1, docid):
yield previd, []

# from now on start adding fields to a new document, with a new id
Expand All @@ -1531,7 +1531,7 @@ def __iter__(self):

# return empty documents between the last explicit document and the number
# of documents as specified in the header
for previd in xrange(previd + 1, self.num_docs):
for previd in range(previd + 1, self.num_docs):
yield previd, []

def docbyoffset(self, offset):
Expand Down
8 changes: 4 additions & 4 deletions gensim/models/atmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
from gensim.corpora import MmCorpus
from itertools import chain
from scipy.special import gammaln # gamma function utils
from six.moves import xrange
from six.moves import range
import six

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -482,7 +482,7 @@ def inference(self, chunk, author2doc, doc2author, rhot, collect_sstats=False, c
phinorm = self.compute_phinorm(expElogthetad, expElogbetad)

# Iterate between gamma and phi until convergence
for _ in xrange(self.iterations):
for _ in range(self.iterations):
lastgamma = tilde_gamma.copy()

# Update gamma.
Expand Down Expand Up @@ -699,7 +699,7 @@ def update(self, corpus=None, author2doc=None, doc2author=None, chunksize=None,
# Just keep training on the already available data.
# Assumes self.update() has been called before with input documents and corresponding authors.
assert self.total_docs > 0, 'update() was called with no documents to train on.'
train_corpus_idx = [d for d in xrange(self.total_docs)]
train_corpus_idx = [d for d in range(self.total_docs)]
num_input_authors = len(self.author2doc)
else:
if doc2author is None and author2doc is None:
Expand Down Expand Up @@ -816,7 +816,7 @@ def update(self, corpus=None, author2doc=None, doc2author=None, chunksize=None,
def rho():
return pow(offset + pass_ + (self.num_updates / chunksize), -decay)

for pass_ in xrange(passes):
for pass_ in range(passes):
if self.dispatcher:
logger.info('initializing %s workers', self.numworkers)
self.dispatcher.reset(self.state)
Expand Down
6 changes: 3 additions & 3 deletions gensim/models/base_any2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
import logging
from timeit import default_timer
import threading
from six.moves import xrange
from six.moves import range
from six import itervalues, string_types
from gensim import matutils
from numpy import float32 as REAL, ones, random, dtype, zeros
Expand Down Expand Up @@ -284,7 +284,7 @@ def _job_producer(self, data_iterator, job_queue, cur_epoch=0, total_examples=No
)

# give the workers heads up that they can finish -- no more work!
for _ in xrange(self.workers):
for _ in range(self.workers):
job_queue.put(None)
logger.debug("job loop exiting, total %i jobs", job_no)

Expand Down Expand Up @@ -472,7 +472,7 @@ def _train_epoch(self, data_iterable, cur_epoch=0, total_examples=None, total_wo
threading.Thread(
target=self._worker_loop,
args=(job_queue, progress_queue,))
for _ in xrange(self.workers)
for _ in range(self.workers)
]

workers.append(threading.Thread(
Expand Down
6 changes: 3 additions & 3 deletions gensim/models/deprecated/doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
from gensim.models.deprecated.old_saveload import SaveLoad

from gensim import matutils # utility fnc for pickling, common scipy operations etc
from six.moves import xrange, zip
from six.moves import zip, range
from six import string_types, integer_types

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -490,7 +490,7 @@ def reset_weights(self, model):
self.doctag_syn0 = empty((length, model.vector_size), dtype=REAL)
self.doctag_syn0_lockf = ones((length,), dtype=REAL) # zeros suppress learning

for i in xrange(length):
for i in range(length):
# construct deterministic seed from index AND model seed
seed = "%d %s" % (model.seed, self.index_to_doctag(i))
self.doctag_syn0[i] = model.seeded_vector(seed)
Expand All @@ -510,7 +510,7 @@ def init_sims(self, replace=False):
if getattr(self, 'doctag_syn0norm', None) is None or replace:
logger.info("precomputing L2-norms of doc weight vectors")
if replace:
for i in xrange(self.doctag_syn0.shape[0]):
for i in range(self.doctag_syn0.shape[0]):
self.doctag_syn0[i, :] /= sqrt((self.doctag_syn0[i, :] ** 2).sum(-1))
self.doctag_syn0norm = self.doctag_syn0
else:
Expand Down
8 changes: 4 additions & 4 deletions gensim/models/deprecated/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@
from gensim import utils, matutils # utility fnc for pickling, common scipy operations etc
from gensim.corpora.dictionary import Dictionary
from six import string_types, iteritems
from six.moves import xrange
from six.moves import range
from scipy import stats


Expand Down Expand Up @@ -239,7 +239,7 @@ def add_word(word, weights):

if binary:
binary_len = dtype(REAL).itemsize * vector_size
for _ in xrange(vocab_size):
for _ in range(vocab_size):
# mixed text and binary: read text first, then binary
word = []
while True:
Expand All @@ -254,7 +254,7 @@ def add_word(word, weights):
weights = fromstring(fin.read(binary_len), dtype=REAL)
add_word(word, weights)
else:
for line_no in xrange(vocab_size):
for line_no in range(vocab_size):
line = fin.readline()
if line == b'':
raise EOFError("unexpected end of input; is count incorrect or file otherwise damaged?")
Expand Down Expand Up @@ -1084,7 +1084,7 @@ def init_sims(self, replace=False):
if getattr(self, 'syn0norm', None) is None or replace:
logger.info("precomputing L2-norms of word weight vectors")
if replace:
for i in xrange(self.syn0.shape[0]):
for i in range(self.syn0.shape[0]):
self.syn0[i, :] /= sqrt((self.syn0[i, :] ** 2).sum(-1))
self.syn0norm = self.syn0
else:
Expand Down
Loading

0 comments on commit 7d173a0

Please sign in to comment.