diff --git a/docs/src/apiref.rst b/docs/src/apiref.rst index 66fe192b07..ffb19b9c5e 100644 --- a/docs/src/apiref.rst +++ b/docs/src/apiref.rst @@ -45,6 +45,7 @@ Modules: models/word2vec models/keyedvectors models/doc2vec + models/doc2vec_inner models/fasttext models/phrases models/poincare @@ -64,6 +65,7 @@ Modules: models/deprecated/word2vec models/deprecated/keyedvectors models/deprecated/fasttext_wrapper + models/base_any2vec similarities/docsim similarities/index sklearn_api/atmodel diff --git a/docs/src/models/base_any2vec.rst b/docs/src/models/base_any2vec.rst new file mode 100644 index 0000000000..e6685cda66 --- /dev/null +++ b/docs/src/models/base_any2vec.rst @@ -0,0 +1,10 @@ +:mod:`models.base_any2vec` -- Base classes for any2vec models +============================================================= + +.. automodule:: gensim.models.base_any2vec + :synopsis: Base classes for any2vec models + :members: + :inherited-members: + :special-members: __getitem__ + :undoc-members: + :show-inheritance: diff --git a/docs/src/models/doc2vec_inner.rst b/docs/src/models/doc2vec_inner.rst new file mode 100644 index 0000000000..1f4ff1d5a0 --- /dev/null +++ b/docs/src/models/doc2vec_inner.rst @@ -0,0 +1,9 @@ +:mod:`models.doc2vec_inner` -- Cython job for training Doc2Vec model +==================================================================== + +.. automodule:: gensim.models.doc2vec_inner + :synopsis: Cython job for training Doc2Vec model + :members: + :inherited-members: + :undoc-members: + :show-inheritance: diff --git a/gensim/models/base_any2vec.py b/gensim/models/base_any2vec.py index e6a31263ec..a11d2074c8 100644 --- a/gensim/models/base_any2vec.py +++ b/gensim/models/base_any2vec.py @@ -5,7 +5,32 @@ # Copyright (C) 2018 RaRe Technologies s.r.o. # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html -"""Contains base classes required for implementing any2vec algorithms.""" +"""This module contains base classes required for implementing any2vec algorithms. + +The class hierarchy is designed to facilitate adding more concrete implementations for creating embeddings. +In the most general case, the purpose of this class is to transform an arbitrary representation to a numerical vector +(embedding). This is represented by the base :class:`~gensim.models.base_any2vec.BaseAny2VecModel`. The input space in +most cases (in the NLP field at least) is plain text. For this reason, we enrich the class hierarchy with the abstract +:class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel` to be used as a base for models where the input +space is text. + +Notes +----- +Even though this is the usual case, not all embeddings transform text. Check the next section for +concrete examples. + +See Also +-------- +:class:`~gensim.models.word2vec.Word2Vec`. + Word2Vec model - embeddings for words. +:class:`~gensim.models.fasttext.FastText`. + FastText model - embeddings for words (ngram-based). +:class:`~gensim.models.doc2vec.Doc2Vec`. + Doc2Vec model - embeddings for documents. +:class:`~gensim.models.poincare.PoincareModel` + Poincare model - embeddings for graphs. + +""" from gensim import utils import logging from timeit import default_timer @@ -28,17 +53,39 @@ class BaseAny2VecModel(utils.SaveLoad): """Base class for training, using and evaluating any2vec model. - Contains implementation for multi-threaded training. - """ + Contains implementation for multi-threaded training. The purpose of this class is to provide a + reference interface for concrete embedding implementations, whether the input space is a corpus + of words, documents or anything else. At the same time, functionality that we expect to be common + for those implementations is provided here to avoid code duplication. + + In the special but usual case where the input space consists of words, a more specialized layer + is provided, consider inheriting from :class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel` + + Notes + ----- + A subclass should initialize the following attributes: + * self.kv - keyed vectors in model (see :class:`~gensim.models.keyedvectors.Word2VecKeyedVectors` as example) + * self.vocabulary - vocabulary (see :class:`~gensim.models.word2vec.Word2VecVocab` as example) + * self.trainables - internal matrices (see :class:`~gensim.models.word2vec.Word2VecTrainables` as example) + + """ def __init__(self, workers=3, vector_size=100, epochs=5, callbacks=(), batch_words=10000): - """Initialize model parameters. + """ - A subclass should initialize the following attributes: - - self.kv (instance of concrete implementation of `BaseKeyedVectors` interface) - - self.vocabulary (instance of concrete implementation of `BaseVocabBuilder` abstract class) - - self.trainables (instance of concrete implementation of `BaseTrainables` abstract class) + Parameters + ---------- + workers : int, optional + Number of working threads, used for multithreading. + vector_size : int, optional + Dimensionality of the feature vectors. + epochs : int, optional + Number of iterations (epochs) of training through the corpus. + callbacks : list of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional + List of callbacks that need to be executed/run at specific stages during training. + batch_words : int, optional + Number of words to be processed by a single job. """ self.vector_size = int(vector_size) @@ -55,7 +102,7 @@ def _get_job_params(self, cur_epoch): raise NotImplementedError() def _set_train_params(self, **kwargs): - """Set model parameters required for training""" + """Set model parameters required for training.""" raise NotImplementedError() def _update_job_params(self, job_params, epoch_progress, cur_epoch): @@ -83,7 +130,24 @@ def _check_training_sanity(self, epochs=None, total_examples=None, total_words=N raise NotImplementedError() def _worker_loop(self, job_queue, progress_queue): - """Train the model, lifting lists of data from the job_queue.""" + """Train the model, lifting lists of data from the queue. + + This function will be called in parallel by multiple workers (threads or processes) to make + optimal use of multicore machines. + + Parameters + ---------- + job_queue : Queue of (list of object, (str, int)) + A queue of jobs still to be processed. The worker will take up jobs from this queue. + Each job is represented by a tuple where the first element is the corpus chunk to be processed and + the second is the dictionary of parameters. + progress_queue : Queue of (int, int, int) + A queue of progress reports. Each report is represented as a tuple of these 3 elements: + * size of data chunk processed, for example number of sentences in the corpus chunk. + * Effective word count used in training (after ignoring unknown words and trimming the sentence length). + * Total word count used in training. + + """ thread_private_mem = self._get_thread_working_mem() jobs_processed = 0 while True: @@ -106,7 +170,30 @@ def _worker_loop(self, job_queue, progress_queue): logger.debug("worker exiting, processed %i jobs", jobs_processed) def _job_producer(self, data_iterator, job_queue, cur_epoch=0, total_examples=None, total_words=None): - """Fill jobs queue using the input `data_iterator`.""" + """Fill the jobs queue using the data found in the input stream. + + Each job is represented by a tuple where the first element is the corpus chunk to be processed and + the second is the dictionary of parameters. + + Parameters + ---------- + data_iterator : iterable of list of object + The input dataset. This will be split in chunks and these chunks will be pushed to the queue. + job_queue : Queue of (list of object, dict of (str, int)) + A queue of jobs still to be processed. The worker will take up jobs from this queue. + Each job is represented by a tuple where the first element is the corpus chunk to be processed and + the second is the dictionary of parameters. + cur_epoch : int, optional + The current training epoch, needed to compute the training parameters for each job. + For example in many implementations the learning rate would be dropping with the number of epochs. + total_examples : int, optional + Count of objects in the `data_iterator`. In the usual case this would correspond to the number of sentences + in a corpus. Used to log progress. + total_words : int, optional + Count of total objects in `data_iterator`. In the usual case this would correspond to the number of raw + words in a corpus. Used to log progress. + + """ job_batch, batch_size = [], 0 pushed_words, pushed_examples = 0, 0 next_job_params = self._get_job_params(cur_epoch) @@ -166,6 +253,40 @@ def _log_train_end(self, raw_word_count, trained_word_count, total_elapsed, job_ def _log_epoch_progress(self, progress_queue, job_queue, cur_epoch=0, total_examples=None, total_words=None, report_delay=1.0): + """Get the progress report for a single training epoch. + + Parameters + ---------- + progress_queue : Queue of (int, int, int) + A queue of progress reports. Each report is represented as a tuple of these 3 elements: + * size of data chunk processed, for example number of sentences in the corpus chunk. + * Effective word count used in training (after ignoring unknown words and trimming the sentence length). + * Total word count used in training. + job_queue : Queue of (list of object, dict of (str, int)) + A queue of jobs still to be processed. The worker will take up jobs from this queue. + Each job is represented by a tuple where the first element is the corpus chunk to be processed and + the second is the dictionary of parameters. + cur_epoch : int, optional + The current training epoch, needed to compute the training parameters for each job. + For example in many implementations the learning rate would be dropping with the number of epochs. + total_examples : int, optional + Count of objects in the `data_iterator`. In the usual case this would correspond to the number of sentences + in a corpus. Used to log progress. + total_words : int, optional + Count of total objects in `data_iterator`. In the usual case this would correspond to the number of raw + words in a corpus. Used to log progress. + report_delay : float, optional + Number of seconds between two consecutive progress report messages in the logger. + + Returns + ------- + (int, int, int) + The epoch report consisting of three elements: + * size of data chunk processed, for example number of sentences in the corpus chunk. + * Effective word count used in training (after ignoring unknown words and trimming the sentence length). + * Total word count used in training. + + """ example_count, trained_word_count, raw_word_count = 0, 0, 0 start, next_report = default_timer() - 0.00001, 1.0 job_tally = 0 @@ -202,7 +323,35 @@ def _log_epoch_progress(self, progress_queue, job_queue, cur_epoch=0, total_exam def _train_epoch(self, data_iterable, cur_epoch=0, total_examples=None, total_words=None, queue_factor=2, report_delay=1.0): - """Train one epoch.""" + """Train the model for a single epoch. + + Parameters + ---------- + data_iterable : iterable of list of object + The input corpus. This will be split in chunks and these chunks will be pushed to the queue. + cur_epoch : int, optional + The current training epoch, needed to compute the training parameters for each job. + For example in many implementations the learning rate would be dropping with the number of epochs. + total_examples : int, optional + Count of objects in the `data_iterator`. In the usual case this would correspond to the number of sentences + in a corpus, used to log progress. + total_words : int, optional + Count of total objects in `data_iterator`. In the usual case this would correspond to the number of raw + words in a corpus, used to log progress. + queue_factor : int, optional + Multiplier for size of queue -> size = number of workers * queue_factor. + report_delay : float, optional + Number of seconds between two consecutive progress report messages in the logger. + + Returns + ------- + (int, int, int) + The training report for this epoch consisting of three elements: + * Size of data chunk processed, for example number of sentences in the corpus chunk. + * Effective word count used in training (after ignoring unknown words and trimming the sentence length). + * Total word count used in training. + + """ job_queue = Queue(maxsize=queue_factor * self.workers) progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers) @@ -230,7 +379,37 @@ def _train_epoch(self, data_iterable, cur_epoch=0, total_examples=None, def train(self, data_iterable, epochs=None, total_examples=None, total_words=None, queue_factor=2, report_delay=1.0, callbacks=(), **kwargs): - """Handle multi-worker training.""" + """Train the model for every epochs using multiple workers. + + Parameters + ---------- + data_iterable : iterable of list of object + The input corpus. This will be split in chunks and these chunks will be pushed to the queue. + epochs : int, optional + Number of epochs (training iterations over the whole input) of training. + total_examples : int, optional + Count of objects in the `data_iterator`. In the usual case this would correspond to the number of sentences + in a corpus, used to log progress. + total_words : int, optional + Count of total objects in `data_iterator`. In the usual case this would correspond to the number of raw + words in a corpus, used to log progress. + queue_factor : int, optional + Multiplier for size of queue -> size = number of workers * queue_factor. + report_delay : float, optional + Number of seconds between two consecutive progress report messages in the logger. + callbacks : list of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional + List of callbacks that need to be executed/run at specific stages during training. + **kwargs : object + Additional key word parameters for the specific model inheriting from this class. + + Returns + ------- + (int, int) + The total training report consisting of two elements: + * size of total data processed, for example number of sentences in the whole corpus. + * Effective word count used in training (after ignoring unknown words and trimming the sentence length). + + """ self._set_train_params(**kwargs) if callbacks: self.callbacks = callbacks @@ -275,19 +454,67 @@ def train(self, data_iterable, epochs=None, total_examples=None, @classmethod def load(cls, fname_or_handle, **kwargs): + """Load a previously saved object (using :meth:`gensim.models.base_any2vec.BaseAny2VecModel.save`) from file. + + Parameters + ---------- + fname_or_handle : {str, file-like object} + Path to file that contains needed object or handle to the opened file. + **kwargs : object + Key word arguments propagated to :meth:`~gensim.utils.SaveLoad.load`. + + See Also + -------- + :meth:`gensim.base_any2vec.BaseAny2VecModel.save` + Method for save a model. + + Returns + ------- + object + Object loaded from `fname_or_handle`. + + Raises + ------ + IOError + When methods are called on instance (should be called from class). + + """ return super(BaseAny2VecModel, cls).load(fname_or_handle, **kwargs) def save(self, fname_or_handle, **kwargs): + """"Save the object to file. + + Parameters + ---------- + fname_or_handle : {str, file-like object} + Path to file where the model will be persisted. + **kwargs : object + Key word arguments propagated to :meth:`~gensim.utils.SaveLoad.save`. + + See Also + -------- + :meth:`gensim.models.base_any2vec.BaseAny2VecModel.save` + Method for load model after current method. + + """ super(BaseAny2VecModel, self).save(fname_or_handle, **kwargs) class BaseWordEmbeddingsModel(BaseAny2VecModel): - """ - Base class containing common methods for training, using & evaluating word embeddings learning models. - For example - `Word2Vec`, `FastText`, etc. + """Base class containing common methods for training, using & evaluating word embeddings learning models. + + See Also + -------- + :class:`~gensim.models.word2vec.Word2Vec`. + Word2Vec model - embeddings for words. + :class:`~gensim.models.fasttext.FastText`. + FastText model - embeddings for words (ngram-based). + :class:`~gensim.models.doc2vec.Doc2Vec`. + Doc2Vec model - embeddings for documents. + :class:`~gensim.models.poincare.PoincareModel` + Poincare model - embeddings for graphs. """ - def _clear_post_train(self): raise NotImplementedError() @@ -300,6 +527,72 @@ def _set_train_params(self, **kwargs): def __init__(self, sentences=None, workers=3, vector_size=100, epochs=5, callbacks=(), batch_words=10000, trim_rule=None, sg=0, alpha=0.025, window=5, seed=1, hs=0, negative=5, cbow_mean=1, min_alpha=0.0001, compute_loss=False, fast_version=0, **kwargs): + """ + + Parameters + ---------- + sentences : iterable of list of str, optional + Can be simply a list of lists of tokens, but for larger corpora, + consider an iterable that streams the sentences directly from disk/network. + See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` + or :class:`~gensim.models.word2vec.LineSentence` for such examples. + workers : int, optional + Number of working threads, used for multiprocessing. + vector_size : int, optional + Dimensionality of the feature vectors. + epochs : int, optional + Number of iterations (epochs) of training through the corpus. + callbacks : list of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional + List of callbacks that need to be executed/run at specific stages during training. + batch_words : int, optional + Number of words to be processed by a single job. + trim_rule : function, optional + Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary, + be trimmed away, or handled using the default (discard if word count < min_count). + Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`), + or a callable that accepts parameters (word, count, min_count) and returns either + :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`. + The rule, if given, is only used to prune vocabulary during current method call and is not stored as part + of the model. + + The input parameters are of the following types: + * `word` (str) - the word we are examining + * `count` (int) - the word's frequency count in the corpus + * `min_count` (int) - the minimum count threshold. + + sg : {1, 0}, optional + Defines the training algorithm. If 1, skip-gram is used, otherwise, CBOW is employed. + alpha : float, optional + The beginning learning rate. This will linearly reduce with iterations until it reaches `min_alpha`. + window : int, optional + The maximum distance between the current and predicted word within a sentence. + seed : int, optional + Seed for the random number generator. Initial vectors for each word are seeded with a hash of + the concatenation of word + `str(seed)`. + Note that for a fully deterministically-reproducible run, you must also limit the model to a single worker + thread (`workers=1`), to eliminate ordering jitter from OS thread scheduling. + In Python 3, reproducibility between interpreter launches also requires use of the `PYTHONHASHSEED` + environment variable to control hash randomization. + hs : {1,0}, optional + If 1, hierarchical softmax will be used for model training. + If set to 0, and `negative` is non-zero, negative sampling will be used. + negative : int, optional + If > 0, negative sampling will be used, the int for negative specifies how many "noise words" + should be drawn (usually between 5-20). + If set to 0, no negative sampling is used. + cbow_mean : {1,0}, optional + If 0, use the sum of the context word vectors. If 1, use the mean, only applies when cbow is used. + min_alpha : float, optional + Final learning rate. Drops linearly with the number of iterations from `alpha`. + compute_loss : bool, optional + If True, loss will be computed while training the Word2Vec model and stored in + :attr:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.running_training_loss` attribute. + fast_version : {-1, 1}, optional + Whether or not the fast cython implementation of the internal training methods is available. 1 means it is. + **kwargs : object + Key word arguments needed to allow children classes to accept more arguments. + + """ self.sg = int(sg) if vector_size % 4 != 0: logger.warning("consider setting layer size to a multiple of 4 for greater performance") @@ -455,25 +748,51 @@ def cum_table(self): del self.vocabulary.cum_table def __str__(self): + """Get a human readable representation of the object. + + Returns + ------- + str + A human readable string containing the class name, as well as the size of dictionary, number of + features and starting learning rate used by the object. + + """ return "%s(vocab=%s, size=%s, alpha=%s)" % ( self.__class__.__name__, len(self.wv.index2word), self.vector_size, self.alpha ) def build_vocab(self, sentences, update=False, progress_per=10000, keep_raw_vocab=False, trim_rule=None, **kwargs): """Build vocabulary from a sequence of sentences (can be a once-only generator stream). - Each sentence is a iterable of iterables (can simply be a list of unicode strings too). Parameters ---------- - sentences : iterable of iterables - The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora, + sentences : iterable of list of str + Can be simply a list of lists of tokens, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` - or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. - update : bool + or :class:`~gensim.models.word2vec.LineSentence` module for such examples. + update : bool, optional If true, the new words in `sentences` will be added to model's vocab. - progress_per : int + progress_per : int, optional Indicates how many words to process before showing/updating the progress. + keep_raw_vocab : bool, optional + If False, the raw vocabulary will be deleted after the scaling is done to free up RAM. + trim_rule : function, optional + Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary, + be trimmed away, or handled using the default (discard if word count < min_count). + Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`), + or a callable that accepts parameters (word, count, min_count) and returns either + :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`. + The rule, if given, is only used to prune vocabulary during current method call and is not stored as part + of the model. + + The input parameters are of the following types: + * `word` (str) - the word we are examining + * `count` (int) - the word's frequency count in the corpus + * `min_count` (int) - the minimum count threshold. + + **kwargs : object + Key word arguments propagated to `self.vocabulary.prepare_vocab` """ total_words, corpus_count = self.vocabulary.scan_vocab( @@ -487,34 +806,31 @@ def build_vocab(self, sentences, update=False, progress_per=10000, keep_raw_voca def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False): """Build vocabulary from a dictionary of word frequencies. - Build model vocabulary from a passed dictionary that contains (word,word count). - Words must be of type unicode strings. Parameters ---------- - word_freq : dict - Word,Word_Count dictionary. - keep_raw_vocab : bool - If not true, delete the raw vocabulary after the scaling is done and free up RAM. - corpus_count : int + word_freq : dict of (str, int) + A mapping from a word in the vocabulary to its frequency count. + keep_raw_vocab : bool, optional + If False, delete the raw vocabulary after the scaling is done to free up RAM. + corpus_count : int, optional Even if no corpus is provided, this argument can set corpus_count explicitly. - trim_rule : function + trim_rule : function, optional Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`), or a callable that accepts parameters (word, count, min_count) and returns either :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`. - Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part + The rule, if given, is only used to prune vocabulary during current method call and is not stored as part of the model. - update : bool - If true, the new provided words in `word_freq` dict will be added to model's vocab. - Examples - -------- - >>> from gensim.models import Word2Vec - >>> - >>> model= Word2Vec() - >>> model.build_vocab_from_freq({"Word1": 15, "Word2": 20}) + The input parameters are of the following types: + * `word` (str) - the word we are examining + * `count` (int) - the word's frequency count in the corpus + * `min_count` (int) - the minimum count threshold. + + update : bool, optional + If true, the new provided words in `word_freq` dict will be added to model's vocab. """ logger.info("Processing provided word frequencies") @@ -526,7 +842,7 @@ def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=No len(raw_vocab), sum(itervalues(raw_vocab)) ) - # Since no sentences are provided, this is to control the corpus_count + # Since no sentences are provided, this is to control the corpus_count. self.corpus_count = corpus_count or 0 self.vocabulary.raw_vocab = raw_vocab @@ -539,7 +855,21 @@ def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=No self.hs, self.negative, self.wv, update=update, vocabulary=self.vocabulary) # build tables & arrays def estimate_memory(self, vocab_size=None, report=None): - """Estimate required memory for a model using current settings and provided vocabulary size.""" + """Estimate required memory for a model using current settings and provided vocabulary size. + + Parameters + ---------- + vocab_size : int, optional + Number of unique tokens in the vocabulary + report : dict of (str, int), optional + A dictionary from string representations of the model's memory consuming members to their size in bytes. + + Returns + ------- + dict of (str, int) + A dictionary from string representations of the model's memory consuming members to their size in bytes. + + """ vocab_size = vocab_size or len(self.wv.vocab) report = report or {} report['vocab'] = vocab_size * (700 if self.hs else 500) @@ -558,6 +888,43 @@ def estimate_memory(self, vocab_size=None, report=None): def train(self, sentences, total_examples=None, total_words=None, epochs=None, start_alpha=None, end_alpha=None, word_count=0, queue_factor=2, report_delay=1.0, compute_loss=False, callbacks=()): + """Train the model. If the hyper-parameters are passed, they override the ones set in the constructor. + + Parameters + ---------- + sentences : iterable of list of str + Can be simply a list of lists of tokens, but for larger corpora, + consider an iterable that streams the sentences directly from disk/network. + See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` + or :class:`~gensim.models.word2vec.LineSentence` module for such examples. + total_examples : int, optional + Count of sentences. + total_words : int, optional + Count of raw words in sentences. + epochs : int, optional + Number of iterations (epochs) over the corpus. + start_alpha : float, optional + Initial learning rate. + end_alpha : float, optional + Final learning rate. Drops linearly with the number of iterations from `start_alpha`. + word_count : int, optional + Count of words already trained. Leave this to 0 for the usual case of training on all words in sentences. + queue_factor : int, optional + Multiplier for size of queue -> size = number of workers * queue_factor. + report_delay : float, optional + Seconds to wait before reporting progress. + compute_loss : bool, optional + If True, loss will be computed while training the Word2Vec model and stored in + :attr:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.running_training_loss`. + callbacks : list of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional + List of callbacks that need to be executed/run at specific stages during training. + + Returns + ------- + (int, int) + Tuple of (effective word count after ignoring unknown words and sentence length trimming, total word count). + + """ self.alpha = start_alpha or self.alpha self.min_alpha = end_alpha or self.min_alpha @@ -569,11 +936,40 @@ def train(self, sentences, total_examples=None, total_words=None, queue_factor=queue_factor, report_delay=report_delay, compute_loss=compute_loss, callbacks=callbacks) def _get_job_params(self, cur_epoch): - """Get the parameter required for each batch.""" + """Get the learning rate used in the current epoch. + + Parameters + ---------- + cur_epoch : int + Current iteration through the corpus + + Returns + ------- + float + The learning rate for this epoch (it is linearly reduced with epochs from `self.alpha` to `self.min_alpha`). + + """ alpha = self.alpha - ((self.alpha - self.min_alpha) * float(cur_epoch) / self.epochs) return alpha def _update_job_params(self, job_params, epoch_progress, cur_epoch): + """Get the correct learning rate for the next iteration. + + Parameters + ---------- + job_params : dict of (str, obj) + UNUSED. + epoch_progress : float + Ratio of finished work in the current epoch. + cur_epoch : int + Number of current iteration. + + Returns + ------- + float + The learning rate to be used in the next training epoch. + + """ start_alpha = self.alpha end_alpha = self.min_alpha progress = (cur_epoch + epoch_progress) / self.epochs @@ -583,15 +979,60 @@ def _update_job_params(self, job_params, epoch_progress, cur_epoch): return next_alpha def _get_thread_working_mem(self): + """Computes the memory used per worker thread. + + Returns + ------- + (np.ndarray, np.ndarray) + Each worker threads private work memory. + + """ work = matutils.zeros_aligned(self.trainables.layer1_size, dtype=REAL) # per-thread private work memory neu1 = matutils.zeros_aligned(self.trainables.layer1_size, dtype=REAL) return work, neu1 def _raw_word_count(self, job): - """Get the number of words in a given job.""" + """Get the number of words in a given job. + + Parameters + ---------- + job: iterable of list of str + The corpus chunk processed in a single batch. + + Returns + ------- + int + Number of raw words in the corpus chunk. + + """ return sum(len(sentence) for sentence in job) def _check_training_sanity(self, epochs=None, total_examples=None, total_words=None, **kwargs): + """Checks whether the training parameters make sense. + + Called right before training starts in :meth:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.train` + and raises warning or errors depending on the severity of the issue in case an inconsistent parameter + combination is detected. + + Parameters + ---------- + epochs : int, optional + Number of training epochs. Must have a (non None) value. + total_examples : int, optional + Number of documents in the corpus. Either `total_examples` or `total_words` **must** be supplied. + total_words : int, optional + Number of words in the corpus. Either `total_examples` or `total_words` **must** be supplied. + **kwargs : object + Unused. Present to preserve signature among base and inherited implementations. + + Raises + ------ + RuntimeError + If one of the required training pre/post processing steps have not been performed. + ValueError + If the combination of input parameters is inconsistent. + + """ if self.alpha > self.min_alpha_yet_reached: logger.warning("Effective 'alpha' higher than previous training cycles") if self.model_trimmed_post_training: @@ -626,6 +1067,35 @@ def _check_training_sanity(self, epochs=None, total_examples=None, total_words=N @classmethod def load(cls, *args, **kwargs): + """Load a previously saved object (using :meth:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.save`) from file. + + Also initializes extra instance attributes in case the loaded model does not include them. + `*args` or `**kwargs` **MUST** include the fname argument (path to saved file). + See :meth:`~gensim.utils.SaveLoad.load`. + + Parameters + ---------- + *args : object + Positional arguments passed to :meth:`~gensim.utils.SaveLoad.load`. + **kwargs : object + Key word arguments passed to :meth:`~gensim.utils.SaveLoad.load`. + + See Also + -------- + :meth:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.save` + Method for save a model. + + Returns + ------- + :class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel` + Model loaded from disk. + + Raises + ------ + IOError + When methods are called on instance (should be called from class). + + """ model = super(BaseWordEmbeddingsModel, cls).load(*args, **kwargs) if model.negative and hasattr(model.wv, 'index2word'): model.vocabulary.make_cum_table(model.wv) # rebuild cum_table from vocabulary @@ -642,6 +1112,35 @@ def load(cls, *args, **kwargs): def _log_progress(self, job_queue, progress_queue, cur_epoch, example_count, total_examples, raw_word_count, total_words, trained_word_count, elapsed): + """Callback used to log progress for long running jobs. + + Parameters + ---------- + job_queue : Queue of (list of object, dict of (str, float)) + The queue of jobs still to be performed by workers. Each job is represented as a tuple containing + the batch of data to be processed and the parameters to be used for the processing as a dict. + progress_queue : Queue of (int, int, int) + A queue of progress reports. Each report is represented as a tuple of these 3 elements: + * size of data chunk processed, for example number of sentences in the corpus chunk. + * Effective word count used in training (after ignoring unknown words and trimming the sentence length). + * Total word count used in training. + cur_epoch : int + The current training iteration through the corpus. + example_count : int + Number of examples (could be sentences for example) processed until now. + total_examples : int + Number of all examples present in the input corpus. + raw_word_count : int + Number of words used in training until now. + total_words : int + Number of all words in the input corpus. + trained_word_count : int + Number of effective words used in training until now (after ignoring unknown words and trimming + the sentence length). + elapsed : int + Elapsed time since the beginning of training in seconds. + + """ if total_examples: # examples-based progress % logger.info( @@ -659,6 +1158,31 @@ def _log_progress(self, job_queue, progress_queue, cur_epoch, example_count, tot def _log_epoch_end(self, cur_epoch, example_count, total_examples, raw_word_count, total_words, trained_word_count, elapsed): + """Callback used to log the end of a training epoch. + + Parameters + ---------- + cur_epoch : int + The current training iteration through the corpus. + example_count : int + Number of examples (could be sentences for example) processed until now. + total_examples : int + Number of all examples present in the input corpus. + raw_word_count : int + Number of words used in training until now. + total_words : int + Number of all words in the input corpus. + trained_word_count : int + Number of effective words used in training until now (after ignoring unknown words and trimming + the sentence length). + elapsed : int + Elapsed time since the beginning of training in seconds. + + Warnings + -------- + In case the corpus is changed while the epoch was running. + + """ logger.info( "EPOCH - %i : training on %i raw words (%i effective words) took %.1fs, %.0f effective words/s", cur_epoch + 1, raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed @@ -677,6 +1201,20 @@ def _log_epoch_end(self, cur_epoch, example_count, total_examples, raw_word_coun ) def _log_train_end(self, raw_word_count, trained_word_count, total_elapsed, job_tally): + """Callback to log the end of training. + + Parameters + ---------- + raw_word_count : int + Number of words used in the whole training. + trained_word_count : int + Number of effective words used in training (after ignoring unknown words and trimming the sentence length). + total_elapsed : int + Total time spent during training in seconds. + job_tally : int + Total number of jobs processed during training. + + """ logger.info( "training on a %i raw words (%i effective words) took %.1fs, %.0f effective words/s", raw_word_count, trained_word_count, total_elapsed, trained_word_count / total_elapsed @@ -689,73 +1227,84 @@ def _log_train_end(self, raw_word_count, trained_word_count, total_elapsed, job_ # for backward compatibility @deprecated("Method will be removed in 4.0.0, use self.wv.most_similar() instead") def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=None, indexer=None): - """ - Deprecated. Use self.wv.most_similar() instead. - Refer to the documentation for `gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.most_similar` + """Deprecated, use self.wv.most_similar() instead. + + Refer to the documentation for :meth:`~gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.most_similar`. + """ return self.wv.most_similar(positive, negative, topn, restrict_vocab, indexer) @deprecated("Method will be removed in 4.0.0, use self.wv.wmdistance() instead") def wmdistance(self, document1, document2): - """ - Deprecated. Use self.wv.wmdistance() instead. - Refer to the documentation for `gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.wmdistance` + """Deprecated, use self.wv.wmdistance() instead. + + Refer to the documentation for :meth:`~gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.wmdistance`. + """ return self.wv.wmdistance(document1, document2) @deprecated("Method will be removed in 4.0.0, use self.wv.most_similar_cosmul() instead") def most_similar_cosmul(self, positive=None, negative=None, topn=10): - """ - Deprecated. Use self.wv.most_similar_cosmul() instead. - Refer to the documentation for `gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.most_similar_cosmul` + """Deprecated, use self.wv.most_similar_cosmul() instead. + + Refer to the documentation for + :meth:`~gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.most_similar_cosmul`. + """ return self.wv.most_similar_cosmul(positive, negative, topn) @deprecated("Method will be removed in 4.0.0, use self.wv.similar_by_word() instead") def similar_by_word(self, word, topn=10, restrict_vocab=None): - """ - Deprecated. Use self.wv.similar_by_word() instead. - Refer to the documentation for `gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.similar_by_word` + """Deprecated, use self.wv.similar_by_word() instead. + + Refer to the documentation for :meth:`~gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.similar_by_word`. + """ return self.wv.similar_by_word(word, topn, restrict_vocab) @deprecated("Method will be removed in 4.0.0, use self.wv.similar_by_vector() instead") def similar_by_vector(self, vector, topn=10, restrict_vocab=None): - """ - Deprecated. Use self.wv.similar_by_vector() instead. - Refer to the documentation for `gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.similar_by_vector` + """Deprecated, use self.wv.similar_by_vector() instead. + + Refer to the documentation for :meth:`~gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.similar_by_vector`. + """ return self.wv.similar_by_vector(vector, topn, restrict_vocab) @deprecated("Method will be removed in 4.0.0, use self.wv.doesnt_match() instead") def doesnt_match(self, words): - """ - Deprecated. Use self.wv.doesnt_match() instead. - Refer to the documentation for `gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.doesnt_match` + """Deprecated, use self.wv.doesnt_match() instead. + + Refer to the documentation for :meth:`~gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.doesnt_match`. + """ return self.wv.doesnt_match(words) @deprecated("Method will be removed in 4.0.0, use self.wv.similarity() instead") def similarity(self, w1, w2): - """ - Deprecated. Use self.wv.similarity() instead. - Refer to the documentation for `gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.similarity` + """Deprecated, use self.wv.similarity() instead. + + Refer to the documentation for :meth:`~gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.similarity`. + """ return self.wv.similarity(w1, w2) @deprecated("Method will be removed in 4.0.0, use self.wv.n_similarity() instead") def n_similarity(self, ws1, ws2): - """ - Deprecated. Use self.wv.n_similarity() instead. - Refer to the documentation for `gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.n_similarity` + """Deprecated, use self.wv.n_similarity() instead. + + Refer to the documentation for :meth:`~gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.n_similarity`. + """ return self.wv.n_similarity(ws1, ws2) @deprecated("Method will be removed in 4.0.0, use self.wv.evaluate_word_pairs() instead") def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case_insensitive=True, dummy4unknown=False): - """ - Deprecated. Use self.wv.evaluate_word_pairs() instead. - Refer to the documentation for `gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.evaluate_word_pairs` + """Deprecated, use self.wv.evaluate_word_pairs() instead. + + Refer to the documentation for + :meth:`~gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.evaluate_word_pairs`. + """ return self.wv.evaluate_word_pairs(pairs, delimiter, restrict_vocab, case_insensitive, dummy4unknown) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index f57694273d..c4eb6c6a4a 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -6,45 +6,47 @@ # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html -""" -Deep learning via the distributed memory and distributed bag of words models from -[1]_, using either hierarchical softmax or negative sampling [2]_ [3]_. See [#tutorial]_ +"""Deep learning via the distributed memory and distributed bag of words models from +`Quoc Le and Tomas Mikolov: "Distributed Representations of Sentences and Documents" +`_, using either hierarchical softmax or negative sampling, see +`Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean: "Efficient Estimation of Word Representations in +Vector Space, in Proceedings of Workshop at ICLR, 2013" `_ and +`Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg Corrado, and Jeffrey Dean: "Distributed Representations of Words +and Phrases and their Compositionality. In Proceedings of NIPS, 2013" +`_. + +For a real world usage scenario, see the `Doc2vec in gensim tutorial +`_. **Make sure you have a C compiler before installing gensim, to use optimized (compiled) -doc2vec training** (70x speedup [blog]_). - -Initialize a model with e.g.:: - ->>> model = Doc2Vec(documents, size=100, window=8, min_count=5, workers=4) - -Persist a model to disk with:: +doc2vec training** (70x speedup `blog `_). ->>> model.save(fname) ->>> model = Doc2Vec.load(fname) # you can continue training with the loaded model! +Examples +-------- -If you're finished training a model (=no more updates, only querying), you can do +Initialize & train a model - >>> model.delete_temporary_training_data(keep_doctags_vectors=True, keep_inference=True): +>>> from gensim.test.utils import common_texts, get_tmpfile +>>> from gensim.models.doc2vec import Doc2Vec, TaggedDocument +>>> +>>> documents = [TaggedDocument(word, [i]) for i, word in enumerate(common_texts)] +>>> model = Doc2Vec(documents, vector_size=5, window=2, min_count=1, workers=4) -to trim unneeded model memory = use (much) less RAM. +Persist a model to disk +>>> tmp_f = get_tmpfile("model") +>>> model.save(tmp_f) +>>> model = Doc2Vec.load(tmp_f) # you can continue training with the loaded model! +If you're finished training a model (=no more updates, only querying, reduce memory usage), you can do -.. [1] Quoc Le and Tomas Mikolov. Distributed Representations of Sentences and Documents. - http://arxiv.org/pdf/1405.4053v2.pdf -.. [2] Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. - Efficient Estimation of Word Representations in Vector Space. In Proceedings of Workshop at ICLR, 2013. -.. [3] Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg Corrado, and Jeffrey Dean. - Distributed Representations of Words and Phrases and their Compositionality. In Proceedings of NIPS, 2013. -.. [blog] Optimizing word2vec in gensim, http://radimrehurek.com/2013/09/word2vec-in-python-part-two-optimizing/ - -.. [#tutorial] Doc2vec in gensim tutorial, - https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/doc2vec-lee.ipynb +>>> model.delete_temporary_training_data(keep_doctags_vectors=True, keep_inference=True) +Infer vector for new document +>>> vector = model.infer_vector(["system", "response"]) """ - import logging import os import warnings @@ -85,20 +87,53 @@ def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): - """ - Update distributed bag of words model ("PV-DBOW") by training on a single document. - Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. - The document is provided as `doc_words`, a list of word tokens which are looked up - in the model's vocab dictionary, and `doctag_indexes`, which provide indexes - into the doctag_vectors array. - If `train_words` is True, simultaneously train word-to-word (not just doc-to-word) - examples, exactly as per Word2Vec skip-gram training. (Without this option, - word vectors are neither consulted nor updated during DBOW doc vector training.) - Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to - prevent learning-updates to those respective model weights, as if using the - (partially-)frozen model to infer other compatible vectors. + """Update distributed bag of words model ("PV-DBOW") by training on a single document. + + Called internally from :meth:`~gensim.models.doc2vec.Doc2Vec.train` and + :meth:`~gensim.models.doc2vec.Doc2Vec.infer_vector`. + + Notes + ----- This is the non-optimized, Python version. If you have cython installed, gensim - will use the optimized version from doc2vec_inner instead. + will use the optimized version from :mod:`gensim.models.doc2vec_inner` instead. + + Parameters + ---------- + model : :class:`~gensim.models.doc2vec.Doc2Vec` + The model to train. + doc_words : list of str + The input document as a list of words to be used for training. Each word will be looked up in + the model's vocabulary. + doctag_indexes : list of int + Indices into `doctag_vectors` used to obtain the tags of the document. + alpha : float + Learning rate. + work : np.ndarray + Private working memory for each worker. + train_words : bool, optional + Word vectors will be updated exactly as per Word2Vec skip-gram training only if **both** + `learn_words` and `train_words` are set to True. + learn_doctags : bool, optional + Whether the tag vectors should be updated. + learn_words : bool, optional + Word vectors will be updated exactly as per Word2Vec skip-gram training only if **both** + `learn_words` and `train_words` are set to True. + learn_hidden : bool, optional + Whether or not the weights of the hidden layer will be updated. + word_vectors : object, optional + UNUSED. + word_locks : object, optional + UNUSED. + doctag_vectors : list of list of float, optional + Vector representations of the tags. If None, these will be retrieved from the model. + doctag_locks : list of float, optional + The lock factors for each tag. + + Returns + ------- + int + Number of words in the input document. + """ if doctag_vectors is None: doctag_vectors = model.docvecs.doctag_syn0 @@ -119,21 +154,55 @@ def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): - """ - Update distributed memory model ("PV-DM") by training on a single document. - Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. This - method implements the DM model with a projection (input) layer that is - either the sum or mean of the context vectors, depending on the model's - `dm_mean` configuration field. See `train_document_dm_concat()` for the DM - model with a concatenated input layer. - The document is provided as `doc_words`, a list of word tokens which are looked up - in the model's vocab dictionary, and `doctag_indexes`, which provide indexes - into the doctag_vectors array. - Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to - prevent learning-updates to those respective model weights, as if using the - (partially-)frozen model to infer other compatible vectors. - This is the non-optimized, Python version. If you have a C compiler, gensim - will use the optimized version from doc2vec_inner instead. + """Update distributed memory model ("PV-DM") by training on a single document. + + Called internally from :meth:`~gensim.models.doc2vec.Doc2Vec.train` and + :meth:`~gensim.models.doc2vec.Doc2Vec.infer_vector`. This method implements + the DM model with a projection (input) layer that is either the sum or mean of + the context vectors, depending on the model's `dm_mean` configuration field. + + Notes + ----- + This is the non-optimized, Python version. If you have cython installed, gensim + will use the optimized version from :mod:`gensim.models.doc2vec_inner` instead. + + Parameters + ---------- + model : :class:`~gensim.models.doc2vec.Doc2Vec` + The model to train. + doc_words : list of str + The input document as a list of words to be used for training. Each word will be looked up in + the model's vocabulary. + doctag_indexes : list of int + Indices into `doctag_vectors` used to obtain the tags of the document. + alpha : float + Learning rate. + work : object + UNUSED. + neu1 : object + UNUSED. + learn_doctags : bool, optional + Whether the tag vectors should be updated. + learn_words : bool, optional + Word vectors will be updated exactly as per Word2Vec skip-gram training only if **both** + `learn_words` and `train_words` are set to True. + learn_hidden : bool, optional + Whether or not the weights of the hidden layer will be updated. + word_vectors : iterable of list of float, optional + Vector representations of each word in the model's vocabulary. + word_locks : list of float, optional + Lock factors for each word in the vocabulary. + doctag_vectors : list of list of float, optional + Vector representations of the tags. If None, these will be retrieved from the model. + doctag_locks : list of float, optional + The lock factors for each tag. + + Returns + ------- + int + Number of words in the input document that were actually used for training (they were found in the + vocabulary and they were not discarded by negative sampling). + """ if word_vectors is None: word_vectors = model.wv.syn0 @@ -172,18 +241,55 @@ def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=N def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): - """ - Update distributed memory model ("PV-DM") by training on a single document, using a - concatenation of the context window word vectors (rather than a sum or average). - Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. - The document is provided as `doc_words`, a list of word tokens which are looked up - in the model's vocab dictionary, and `doctag_indexes`, which provide indexes - into the doctag_vectors array. - Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to - prevent learning-updates to those respective model weights, as if using the - (partially-)frozen model to infer other compatible vectors. - This is the non-optimized, Python version. If you have a C compiler, gensim - will use the optimized version from doc2vec_inner instead. + """Update distributed memory model ("PV-DM") by training on a single document, using a + concatenation of the context window word vectors (rather than a sum or average). This + might be slower since the input at each batch will be significantly larger. + + Called internally from :meth:`~gensim.models.doc2vec.Doc2Vec.train` and + :meth:`~gensim.models.doc2vec.Doc2Vec.infer_vector`. + + Notes + ----- + This is the non-optimized, Python version. If you have cython installed, gensim + will use the optimized version from :mod:`gensim.models.doc2vec_inner` instead. + + Parameters + ---------- + model : :class:`~gensim.models.doc2vec.Doc2Vec` + The model to train. + doc_words : list of str + The input document as a list of words to be used for training. Each word will be looked up in + the model's vocabulary. + doctag_indexes : list of int + Indices into `doctag_vectors` used to obtain the tags of the document. + alpha : float + Learning rate. + work : object + UNUSED. + neu1 : object + UNUSED. + learn_doctags : bool, optional + Whether the tag vectors should be updated. + learn_words : bool, optional + Word vectors will be updated exactly as per Word2Vec skip-gram training only if **both** + `learn_words` and `train_words` are set to True. + learn_hidden : bool, optional + Whether or not the weights of the hidden layer will be updated. + word_vectors : iterable of list of float, optional + Vector representations of each word in the model's vocabulary. + word_locks : listf of float, optional + Lock factors for each word in the vocabulary. + doctag_vectors : list of list of float, optional + Vector representations of the tags. If None, these will be retrieved from the model. + doctag_locks : list of float, optional + The lock factors for each tag. + + Returns + ------- + int + Number of words in the input document that were actually used for training (they were found in the + vocabulary and they were not discarded by negative sampling). + """ if word_vectors is None: word_vectors = model.wv.syn0 @@ -234,36 +340,44 @@ def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, class TaggedDocument(namedtuple('TaggedDocument', 'words tags')): - """ - A single document, made up of `words` (a list of unicode string tokens) - and `tags` (a list of tokens). Tags may be one or more unicode string - tokens, but typical practice (which will also be most memory-efficient) is - for the tags list to include a unique integer id as the only tag. + """Represents a document along with a tag, input document format for :class:`~gensim.models.doc2vec.Doc2Vec`. - Replaces "sentence as a list of words" from Word2Vec. + A single document, made up of `words` (a list of unicode string tokens) and `tags` (a list of tokens). + Tags may be one or more unicode string tokens, but typical practice (which will also be most memory-efficient) + is for the tags list to include a unique integer id as the only tag. - """ + Replaces "sentence as a list of words" from :class:`gensim.models.word2vec.Word2Vec`. + """ def __str__(self): + """Human readable representation of the object's state, used for debugging. + + Returns + ------- + str + Human readable representation of the object's state (words and tags). + + """ return '%s(%s, %s)' % (self.__class__.__name__, self.words, self.tags) # for compatibility @deprecated("Class will be removed in 4.0.0, use TaggedDocument instead") class LabeledSentence(TaggedDocument): + """Deprecated, use :class:`~gensim.models.doc2vec.TaggedDocument` instead.""" pass class Doctag(namedtuple('Doctag', 'offset, word_count, doc_count')): - """A string document tag discovered during the initial vocabulary - scan. (The document-vector equivalent of a Vocab object.) + """A string document tag discovered during the initial vocabulary scan. + The document-vector equivalent of a Vocab object. Will not be used if all presented document tags are ints. - The offset is only the true index into the doctags_syn0/doctags_syn0_lockf - if-and-only-if no raw-int tags were used. If any raw-int tags were used, - string Doctag vectors begin at index (max_rawint + 1), so the true index is - (rawint_index + 1 + offset). See also _index_to_doctag(). + The offset is only the true index into the doctags_syn0/doctags_syn0_lockf if-and-only-if no raw-int tags were used. + If any raw-int tags were used, string Doctag vectors begin at index (max_rawint + 1), so the true index is + (rawint_index + 1 + offset), see also :meth:`~gensim.models.keyedvectors.Doc2VecKeyedVectors._index_to_doctag`. + """ __slots__ = () @@ -272,86 +386,123 @@ def repeat(self, word_count): class Doc2Vec(BaseWordEmbeddingsModel): - """Class for training, using and evaluating neural networks described in http://arxiv.org/pdf/1405.4053v2.pdf""" + """Class for training, using and evaluating neural networks described in + `Distributed Representations of Sentences and Documents `_. + + Some important internal attributes are the following: + + Attributes + ---------- + wv : :class:`~gensim.models.keyedvectors.Word2VecKeyedVectors` + This object essentially contains the mapping between words and embeddings. After training, it can be used + directly to query those embeddings in various ways. See the module level docstring for examples. + + docvecs : :class:`~gensim.models.keyedvectors.Doc2VecKeyedVectors` + This object contains the paragraph vectors. Remember that the only difference between this model and + :class:`~gensim.models.word2vec.Word2Vec` is that besides the word vectors we also include paragraph embeddings + to capture the paragraph. + In this way we can capture the difference between the same word used in a different wide context. + For example we now have a different representation of the word "leaves" in the following two sentences :: + + 1. Manos leaves the office every day at 18:00 to catch his train + 2. This season is called Fall, because leaves fall from the trees. + + In a plain :class:`~gensim.models.word2vec.Word2Vec` model the word would have exactly the same representation + in both sentences, in :class:`~gensim.models.doc2vec.Doc2Vec` it will not. + + vocabulary : :class:`~gensim.models.doc2vec.Doc2VecVocab` + This object represents the vocabulary (sometimes called Dictionary in gensim) of the model. + Besides keeping track of all unique words, this object provides extra functionality, such as + sorting words by frequency, or discarding extremely rare words. + + trainables : :class:`~gensim.models.doc2vec.Doc2VecTrainables` + This object represents the inner shallow neural network used to train the embeddings. The semantics of the + network differ slightly in the two available training modes (CBOW or SG) but you can think of it as a NN with + a single projection and hidden layer which we train on the corpus. The weights are then used as our embeddings + The only addition to the underlying NN used in :class:`~gensim.models.word2vec.Word2Vec` is that the input + includes not only the word vectors of each word in the context, but also the paragraph vector. + """ def __init__(self, documents=None, dm_mean=None, dm=1, dbow_words=0, dm_concat=0, dm_tag_count=1, docvecs=None, docvecs_mapfile=None, comment=None, trim_rule=None, callbacks=(), **kwargs): - """Initialize the model from an iterable of `documents`. Each document is a - TaggedDocument object that will be used for training. + """ Parameters ---------- - documents : iterable of iterables - The `documents` iterable can be simply a list of TaggedDocument elements, but for larger corpora, - consider an iterable that streams the documents directly from disk/network. - If you don't supply `documents`, the model is left uninitialized -- use if - you plan to initialize it in some other way. - - dm : int {1,0} + documents : iterable of list of :class:`~gensim.models.doc2vec.TaggedDocument`, optional + Input corpus, can be simply a list of elements, but for larger corpora,consider an iterable that streams + the documents directly from disk/network. If you don't supply `documents`, the model is + left uninitialized -- use if you plan to initialize it in some other way. + dm : {1,0}, optional Defines the training algorithm. If `dm=1`, 'distributed memory' (PV-DM) is used. Otherwise, `distributed bag of words` (PV-DBOW) is employed. - - size : int + size : int, optional Dimensionality of the feature vectors. - window : int + window : int, optional The maximum distance between the current and predicted word within a sentence. - alpha : float + alpha : float, optional The initial learning rate. - min_alpha : float + min_alpha : float, optional Learning rate will linearly drop to `min_alpha` as training progresses. - seed : int + seed : int, optional Seed for the random number generator. Initial vectors for each word are seeded with a hash of the concatenation of word + `str(seed)`. Note that for a fully deterministically-reproducible run, you must also limit the model to a single worker thread (`workers=1`), to eliminate ordering jitter - from OS thread scheduling. (In Python 3, reproducibility between interpreter launches also requires - use of the `PYTHONHASHSEED` environment variable to control hash randomization). - min_count : int + from OS thread scheduling. + In Python 3, reproducibility between interpreter launches also requires use of the `PYTHONHASHSEED` + environment variable to control hash randomization. + min_count : int, optional Ignores all words with total frequency lower than this. - max_vocab_size : int + max_vocab_size : int, optional Limits the RAM during vocabulary building; if there are more unique words than this, then prune the infrequent ones. Every 10 million word types need about 1GB of RAM. Set to `None` for no limit. - sample : float + sample : float, optional The threshold for configuring which higher-frequency words are randomly downsampled, useful range is (0, 1e-5). - workers : int + workers : int, optional Use these many worker threads to train the model (=faster training with multicore machines). - iter : int + iter : int, optional Number of iterations (epochs) over the corpus. - hs : int {1,0} + hs : {1,0}, optional If 1, hierarchical softmax will be used for model training. If set to 0, and `negative` is non-zero, negative sampling will be used. - negative : int + negative : int, optional If > 0, negative sampling will be used, the int for negative specifies how many "noise words" should be drawn (usually between 5-20). If set to 0, no negative sampling is used. - dm_mean : int {1,0} + dm_mean : {1,0}, optional If 0 , use the sum of the context word vectors. If 1, use the mean. Only applies when `dm` is used in non-concatenative mode. - dm_concat : int {1,0} + dm_concat : {1,0}, optional If 1, use concatenation of context vectors rather than sum/average; Note concatenation results in a much-larger model, as the input is no longer the size of one (sampled or arithmetically combined) word vector, but the size of the tag(s) and all words in the context strung together. - dm_tag_count : int + dm_tag_count : int, optional Expected constant number of document tags per document, when using - dm_concat mode; default is 1. - dbow_words : int {1,0} + dm_concat mode. + dbow_words : {1,0}, optional If set to 1 trains word-vectors (in skip-gram fashion) simultaneous with DBOW doc-vector training; If 0, only trains doc-vectors (faster). - trim_rule : function + trim_rule : function, optional Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`), or a callable that accepts parameters (word, count, min_count) and returns either :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`. - Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part + The rule, if given, is only used to prune vocabulary during current method call and is not stored as part of the model. - callbacks : :obj: `list` of :obj: `~gensim.models.callbacks.CallbackAny2Vec` + + The input parameters are of the following types: + * `word` (str) - the word we are examining + * `count` (int) - the word's frequency count in the corpus + * `min_count` (int) - the minimum count threshold. + + callbacks : :obj: `list` of :obj: `~gensim.models.callbacks.CallbackAny2Vec`, optional List of callbacks that need to be executed/run at specific stages during training. """ - if 'sentences' in kwargs: raise DeprecationWarning( "Parameter 'sentences' was renamed to 'documents', and will be removed in 4.0.0, " @@ -407,28 +558,41 @@ def __init__(self, documents=None, dm_mean=None, dm=1, dbow_words=0, dm_concat=0 @property def dm(self): - """int {1,0} : `dm=1` indicates 'distributed memory' (PV-DM) else - `distributed bag of words` (PV-DBOW) is used.""" + """Indicates whether 'distributed memory' (PV-DM) will be used, else 'distributed bag of words' + (PV-DBOW) is used. + + """ return not self.sg # opposite of SG @property def dbow(self): - """int {1,0} : `dbow=1` indicates `distributed bag of words` (PV-DBOW) else - 'distributed memory' (PV-DM) is used.""" + """Indicates whether 'distributed bag of words' (PV-DBOW) will be used, else 'distributed memory' + (PV-DM) is used. + + """ return self.sg # same as SG def _set_train_params(self, **kwargs): pass def _clear_post_train(self): + """Alias for :meth:`~gensim.models.doc2vec.Doc2Vec.clear_sims`.""" self.clear_sims() def clear_sims(self): + """Resets the current word vectors. """ self.wv.vectors_norm = None self.wv.vectors_docs_norm = None def reset_from(self, other_model): - """Reuse shareable structures from other_model.""" + """Copy shareable data structures from another (possibly pre-trained) model. + + Parameters + ---------- + other_model : :class:`~gensim.models.doc2vec.Doc2Vec` + Other model whose internal data structures will be copied over to the current object. + + """ self.wv.vocab = other_model.wv.vocab self.wv.index2word = other_model.wv.index2word self.vocabulary.cum_table = other_model.vocabulary.cum_table @@ -439,6 +603,23 @@ def reset_from(self, other_model): self.trainables.reset_weights(self.hs, self.negative, self.wv, self.docvecs) def _do_train_job(self, job, alpha, inits): + """Train model using `job` data. + + Parameters + ---------- + job : iterable of list of :class:`~gensim.models.doc2vec.TaggedDocument` + The corpus chunk to be used for training this batch. + alpha : float + Learning rate to be used for training this batch. + inits : (np.ndarray, np.ndarray) + Each worker threads private work memory. + + Returns + ------- + (int, int) + 2-tuple (effective word count after ignoring unknown words and sentence length trimming, total word count). + + """ work, neu1 = inits tally = 0 for doc in job: @@ -465,46 +646,45 @@ def _do_train_job(self, job, alpha, inits): def train(self, documents, total_examples=None, total_words=None, epochs=None, start_alpha=None, end_alpha=None, word_count=0, queue_factor=2, report_delay=1.0, callbacks=()): - """Update the model's neural weights from a sequence of sentences (can be a once-only generator stream). - The `documents` iterable can be simply a list of TaggedDocument elements. + """Update the model's neural weights. To support linear learning-rate decay from (initial) alpha to min_alpha, and accurate - progress-percentage logging, either total_examples (count of sentences) or total_words (count of - raw words in sentences) **MUST** be provided (if the corpus is the same as was provided to - :meth:`~gensim.models.word2vec.Word2Vec.build_vocab()`, the count of examples in that corpus - will be available in the model's :attr:`corpus_count` property). + progress-percentage logging, either total_examples (count of sentences) or total_words (count of raw words + in sentences) **MUST** be provided (if the corpus is the same as was provided to + :meth:`~gensim.models.word2vec.Word2Vec.build_vocab`, the count of examples in that corpus will be available + in the model's :attr:`corpus_count` property). - To avoid common mistakes around the model's ability to do multiple training passes itself, an - explicit `epochs` argument **MUST** be provided. In the common and recommended case, - where :meth:`~gensim.models.word2vec.Word2Vec.train()` is only called once, - the model's cached `iter` value should be supplied as `epochs` value. + To avoid common mistakes around the model's ability to do multiple training passes itself, an explicit `epochs` + argument **MUST** be provided. In the common and recommended case, + where :meth:`~gensim.models.word2vec.Word2Vec.train` is only called once, the model's cached `iter` value + should be supplied as `epochs` value. Parameters ---------- - documents : iterable of iterables - The `documents` iterable can be simply a list of TaggedDocument elements, but for larger corpora, - consider an iterable that streams the documents directly from disk/network. - See :class:`~gensim.models.doc2vec.TaggedBrownCorpus` or :class:`~gensim.models.doc2vec.TaggedLineDocument` - in :mod:`~gensim.models.doc2vec` module for such examples. - total_examples : int + documents : iterable of list of :class:`~gensim.models.doc2vec.TaggedDocument` + Can be simply a list of elements, but for larger corpora,consider an iterable that streams + the documents directly from disk/network. If you don't supply `documents`, the model is + left uninitialized -- use if you plan to initialize it in some other way. + total_examples : int, optional Count of sentences. - total_words : int + total_words : int, optional Count of raw words in documents. - epochs : int + epochs : int, optional Number of iterations (epochs) over the corpus. - start_alpha : float + start_alpha : float, optional Initial learning rate. - end_alpha : float + end_alpha : float, optional Final learning rate. Drops linearly from `start_alpha`. - word_count : int + word_count : int, optional Count of words already trained. Set this to 0 for the usual case of training on all words in sentences. - queue_factor : int + queue_factor : int, optional Multiplier for size of queue (number of workers * queue_factor). - report_delay : float + report_delay : float, optional Seconds to wait before reporting progress. - callbacks : :obj: `list` of :obj: `~gensim.models.callbacks.CallbackAny2Vec` + callbacks : :obj: `list` of :obj: `~gensim.models.callbacks.CallbackAny2Vec`, optional List of callbacks that need to be executed/run at specific stages during training. + """ super(Doc2Vec, self).train( documents, total_examples=total_examples, total_words=total_words, @@ -512,32 +692,56 @@ def train(self, documents, total_examples=None, total_words=None, queue_factor=queue_factor, report_delay=report_delay, callbacks=callbacks) def _raw_word_count(self, job): - """Return the number of words in a given job.""" + """Get the number of words in a given job. + + Parameters + ---------- + job : iterable of list of :class:`~gensim.models.doc2vec.TaggedDocument` + Corpus chunk. + + Returns + ------- + int + Number of raw words in the corpus chunk. + + """ return sum(len(sentence.words) for sentence in job) def estimated_lookup_memory(self): - """Estimated memory for tag lookup; 0 if using pure int tags.""" + """Get estimated memory for tag lookup, 0 if using pure int tags. + + Returns + ------- + int + The estimated RAM required to look up a tag in bytes. + + """ return 60 * len(self.docvecs.offset2doctag) + 140 * len(self.docvecs.doctags) def infer_vector(self, doc_words, alpha=0.1, min_alpha=0.0001, steps=5): - """ - Infer a vector for given post-bulk training document. + """Infer a vector for given post-bulk training document. + + Notes + ----- + Subsequent calls to this function may infer different representations for the same document. + For a more stable representation, increase the number of steps to assert a stricket convergence. Parameters ---------- - doc_words : :obj: `list` of :obj: `str` - Document should be a list of (word) tokens. - alpha : float + doc_words : list of str + A document for which the vector representation will be inferred. + alpha : float, optional The initial learning rate. - min_alpha : float + min_alpha : float, optional Learning rate will linearly drop to `min_alpha` as training progresses. - steps : int - Number of times to train the new document. + steps : int, optional + Number of times to train the new document. A higher value may slow down training, but it will result in more + stable representations. Returns ------- - :obj: `numpy.ndarray` - Returns the inferred vector for the new document. + np.ndarray + The inferred paragraph vector for the new document. """ doctag_vectors, doctag_locks = self.trainables.get_doctag_trainables(doc_words, self.docvecs.vector_size) @@ -567,6 +771,19 @@ def infer_vector(self, doc_words, alpha=0.1, min_alpha=0.0001, steps=5): return doctag_vectors[0] def __getitem__(self, tag): + """Get the vector representation of (possible multi-term) tag. + + Parameters + ---------- + tag : {str, int, list of str, list of int} + The tag (or tags) to be looked up in the model. + + Returns + ------- + np.ndarray + The vector representations of each tag as a matrix (will be 1D if `tag` was a single tag) + + """ if isinstance(tag, string_types + integer_types + (integer,)): if tag not in self.wv.vocab: return self.docvecs[tag] @@ -574,7 +791,14 @@ def __getitem__(self, tag): return vstack([self[i] for i in tag]) def __str__(self): - """Abbreviated name reflecting major configuration paramaters.""" + """Abbreviated name reflecting major configuration parameters. + + Returns + ------- + str + Human readable representation of the models internal state. + + """ segments = [] if self.comment: segments.append('"%s"' % self.comment) @@ -612,11 +836,13 @@ def delete_temporary_training_data(self, keep_doctags_vectors=True, keep_inferen Parameters ---------- - keep_doctags_vectors : bool - Set `keep_doctags_vectors` to False if you don't want to save doctags vectors, - in this case you can't to use docvecs's most_similar, similarity etc. methods. - keep_inference : bool - Set `keep_inference` to False if you don't want to store parameters that is used for infer_vector method + keep_doctags_vectors : bool, optional + Set to False if you don't want to save doctags vectors. In this case you will not be able to use + :meth:`~gensim.models.keyedvectors.Doc2VecKeyedVectors.most_similar`, + :meth:`~gensim.models.keyedvectors.Doc2VecKeyedVectors.similarity`, etc methods. + keep_inference : bool, optional + Set to False if you don't want to store parameters that are used for + :meth:`~gensim.models.doc2vec.Doc2Vec.infer_vector` method. """ if not keep_inference: @@ -633,24 +859,23 @@ def delete_temporary_training_data(self, keep_doctags_vectors=True, keep_inferen del self.trainables.vectors_docs_lockf def save_word2vec_format(self, fname, doctag_vec=False, word_vec=True, prefix='*dt_', fvocab=None, binary=False): - """Store the input-hidden weight matrix in the same format used by the original - C word2vec-tool, for compatibility. + """Store the input-hidden weight matrix in the same format used by the original C word2vec-tool. Parameters ---------- fname : str The file path used to save the vectors in. - doctag_vec : bool + doctag_vec : bool, optional Indicates whether to store document vectors. - word_vec : bool + word_vec : bool, optional Indicates whether to store word vectors. - prefix : str - Uniquely identifies doctags from word vocab, and avoids collision - in case of repeated string in doctag and word vocab. - fvocab : str - Optional file path used to save the vocabulary - binary : bool - If True, the data wil be saved in binary word2vec format, else it will be saved in plain text. + prefix : str, optional + Uniquely identifies doctags from word vocab, and avoids collision in case of repeated string in doctag + and word vocab. + fvocab : str, optional + Optional file path used to save the vocabulary. + binary : bool, optional + If True, the data wil be saved in binary word2vec format, otherwise - will be saved in plain text. """ total_vec = len(self.wv.vocab) + len(self.docvecs) @@ -670,21 +895,41 @@ def save_word2vec_format(self, fname, doctag_vec=False, word_vec=True, prefix='* binary=binary, write_first_line=write_first_line) def init_sims(self, replace=False): - """ - Precompute L2-normalized vectors. - - If `replace` is set, forget the original vectors and only keep the normalized - ones = saves lots of memory! + """Pre-compute L2-normalized vectors. - Note that you **cannot continue training or inference** after doing a replace. - The model becomes effectively read-only = you can call `most_similar`, `similarity` - etc., but not `train` or `infer_vector`. + Parameters + ---------- + replace : bool + If True - forget the original vectors and only keep the normalized ones to saved RAM (also you can't + continue training if call it with `replace=True`). """ - return self.docvecs.init_sims(replace=replace) + self.docvecs.init_sims(replace=replace) @classmethod def load(cls, *args, **kwargs): + """Loads a previously saved :class:`~gensim.models.doc2vec.Doc2Vec` model. + + Parameters + ---------- + fname : str + Path to the saved file. + *args : object + Additional arguments, see `~gensim.models.base_any2vec.BaseWordEmbeddingsModel.load`. + **kwargs : object + Additional arguments, see `~gensim.models.base_any2vec.BaseWordEmbeddingsModel.load`. + + See Also + -------- + :meth:`~gensim.models.doc2vec.Doc2Vec.save` + Save :class:`~gensim.models.doc2vec.Doc2Vec` model. + + Returns + ------- + :class:`~gensim.models.doc2vec.Doc2Vec` + Loaded model. + + """ try: return super(Doc2Vec, cls).load(*args, **kwargs) except AttributeError: @@ -693,7 +938,23 @@ def load(cls, *args, **kwargs): return load_old_doc2vec(*args, **kwargs) def estimate_memory(self, vocab_size=None, report=None): - """Estimate required memory for a model using current settings.""" + """Estimate required memory for a model using current settings. + + Parameters + ---------- + vocab_size : int, optional + Number of raw words in the vocabulary. + report : dict of (str, int), optional + A dictionary from string representations of the **specific** model's memory consuming members + to their size in bytes. + + Returns + ------- + dict of (str, int), optional + A dictionary from string representations of the model's memory consuming members to their size in bytes. + Includes members from the base classes as well as weights and tag lookup memory estimation specific to the + class. + """ report = report or {} report['doctag_lookup'] = self.estimated_lookup_memory() report['doctag_syn0'] = self.docvecs.count * self.vector_size * dtype(REAL).itemsize @@ -701,29 +962,36 @@ def estimate_memory(self, vocab_size=None, report=None): def build_vocab(self, documents, update=False, progress_per=10000, keep_raw_vocab=False, trim_rule=None, **kwargs): """Build vocabulary from a sequence of sentences (can be a once-only generator stream). - Each sentence is a iterable of iterables (can simply be a list of unicode strings too). Parameters ---------- - documents : iterable of iterables - The `documents` iterable can be simply a list of TaggedDocument elements, but for larger corpora, + documents : iterable of list of :class:`~gensim.models.doc2vec.TaggedDocument` + Can be simply a list of :class:`~gensim.models.doc2vec.TaggedDocument` elements, but for larger corpora, consider an iterable that streams the documents directly from disk/network. See :class:`~gensim.models.doc2vec.TaggedBrownCorpus` or :class:`~gensim.models.doc2vec.TaggedLineDocument` - in :mod:`~gensim.models.doc2vec` module for such examples. + update : bool + If true, the new words in `sentences` will be added to model's vocab. + progress_per : int + Indicates how many words to process before showing/updating the progress. keep_raw_vocab : bool If not true, delete the raw vocabulary after the scaling is done and free up RAM. - trim_rule : function + trim_rule : function, optional Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`), or a callable that accepts parameters (word, count, min_count) and returns either :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`. - Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part + The rule, if given, is only used to prune vocabulary during current method call and is not stored as part of the model. - progress_per : int - Indicates how many words to process before showing/updating the progress. - update : bool - If true, the new words in `sentences` will be added to model's vocab. + + The input parameters are of the following types: + * `word` (str) - the word we are examining + * `count` (int) - the word's frequency count in the corpus + * `min_count` (int) - the minimum count threshold. + + **kwargs + Additional key word arguments passed to the internal vocabulary construction. + """ total_words, corpus_count = self.vocabulary.scan_vocab( documents, self.docvecs, progress_per=progress_per, trim_rule=trim_rule) @@ -737,35 +1005,36 @@ def build_vocab(self, documents, update=False, progress_per=10000, keep_raw_voca self.hs, self.negative, self.wv, self.docvecs, update=update) def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False): - """ - Build vocabulary from a dictionary of word frequencies. - Build model vocabulary from a passed dictionary that contains (word,word count). + """Build vocabulary from a dictionary of word frequencies. + + Build model vocabulary from a passed dictionary that contains a (word -> word count) mapping. Words must be of type unicode strings. Parameters ---------- - word_freq : dict - Word,Word_Count dictionary. - keep_raw_vocab : bool + word_freq : dict of (str, int) + Word <-> count mapping. + keep_raw_vocab : bool, optional If not true, delete the raw vocabulary after the scaling is done and free up RAM. - corpus_count : int + corpus_count : int, optional Even if no corpus is provided, this argument can set corpus_count explicitly. - trim_rule : function + trim_rule : function, optional Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`), or a callable that accepts parameters (word, count, min_count) and returns either :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`. - Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part - of the model. - update : bool + The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part of the + model. + + The input parameters are of the following types: + * `word` (str) - the word we are examining + * `count` (int) - the word's frequency count in the corpus + * `min_count` (int) - the minimum count threshold. + + update : bool, optional If true, the new provided words in `word_freq` dict will be added to model's vocab. - Examples - -------- - >>> from gensim.models.word2vec import Word2Vec - >>> model= Word2Vec() - >>> model.build_vocab_from_freq({"Word1": 15, "Word2": 20}) """ logger.info("Processing provided word frequencies") # Instead of scanning text, this will assign provided word frequencies dictionary(word_freq) @@ -790,12 +1059,67 @@ def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=No class Doc2VecVocab(Word2VecVocab): + """Vocabulary used by :class:`~gensim.models.doc2vec.Doc2Vec`. + + This includes a mapping from words found in the corpus to their total frequency count. + + """ def __init__(self, max_vocab_size=None, min_count=5, sample=1e-3, sorted_vocab=True, null_word=0): + """ + + Parameters + ---------- + max_vocab_size : int, optional + Maximum number of words in the Vocabulary. Used to limit the RAM during vocabulary building; + if there are more unique words than this, then prune the infrequent ones. + Every 10 million word types need about 1GB of RAM, set to `None` for no limit. + min_count : int + Words with frequency lower than this limit will be discarded form the vocabulary. + sample : float, optional + The threshold for configuring which higher-frequency words are randomly downsampled, + useful range is (0, 1e-5). + sorted_vocab : bool + If True, sort the vocabulary by descending frequency before assigning word indexes. + null_word : {0, 1} + If True, a null pseudo-word will be created for padding when using concatenative L1 (run-of-words). + This word is only ever input – never predicted – so count, huffman-point, etc doesn't matter. + + """ super(Doc2VecVocab, self).__init__( max_vocab_size=max_vocab_size, min_count=min_count, sample=sample, sorted_vocab=sorted_vocab, null_word=null_word) def scan_vocab(self, documents, docvecs, progress_per=10000, trim_rule=None): + """Create the models Vocabulary: A mapping from unique words in the corpus to their frequency count. + + Parameters + ---------- + documents : iterable of :class:`~gensim.models.doc2vec.TaggedDocument` + The tagged documents used to create the vocabulary. Their tags can be either str tokens or ints (faster). + docvecs : list of :class:`~gensim.models.keyedvectors.Doc2VecKeyedVectors` + The vector representations of the documents in our corpus. Each of them has a size == `vector_size`. + progress_per : int + Progress will be logged every `progress_per` documents. + trim_rule : function, optional + Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary, + be trimmed away, or handled using the default (discard if word count < min_count). + Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`), + or a callable that accepts parameters (word, count, min_count) and returns either + :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`. + The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part of the + model. + + The input parameters are of the following types: + * `word` (str) - the word we are examining + * `count` (int) - the word's frequency count in the corpus + * `min_count` (int) - the minimum count threshold. + + Returns + ------- + (int, int) + Tuple of (Total words in the corpus, number of documents) + + """ logger.info("collecting all words and their counts") document_no = -1 total_words = 0 @@ -843,7 +1167,20 @@ def scan_vocab(self, documents, docvecs, progress_per=10000, trim_rule=None): return total_words, corpus_count def note_doctag(self, key, document_no, document_length, docvecs): - """Note a document tag during initial corpus scan, for structure sizing.""" + """Note a document tag during initial corpus scan, for correctly setting the keyedvectors size. + + Parameters + ---------- + key : {int, str} + The tag to be noted. + document_no : int + The document's index in `docvecs`. Unused. + document_length : int + The document's length in words. + docvecs : list of :class:`~gensim.models.keyedvectors.Doc2VecKeyedVectors` + Vector representations of the documents in the corpus. Each vector has size == `vector_size` + + """ if isinstance(key, integer_types + (integer,)): docvecs.max_rawint = max(docvecs.max_rawint, key) else: @@ -855,12 +1192,41 @@ def note_doctag(self, key, document_no, document_length, docvecs): docvecs.count = docvecs.max_rawint + 1 + len(docvecs.offset2doctag) def indexed_doctags(self, doctag_tokens, docvecs): - """Return indexes and backing-arrays used in training examples.""" + """Get the indexes and backing-arrays used in training examples. + + Parameters + ---------- + doctag_tokens : list of {str, int} + A list of tags for which we want the index. + docvecs : list of :class:`~gensim.models.keyedvectors.Doc2VecKeyedVectors` + Vector representations of the documents in the corpus. Each vector has size == `vector_size` + + Returns + ------- + list of int + Indices of the provided tag keys. + + """ return [ Doc2VecKeyedVectors._int_index(index, docvecs.doctags, docvecs.max_rawint) for index in doctag_tokens if self._tag_seen(index, docvecs)] def _tag_seen(self, index, docvecs): + """Whether or not the tag exists in our Vocabulary. + + Parameters + ---------- + index : {str, int} + The tag to be checked. + docvecs : :class:`~gensim.models.keyedvectors.Doc2VecKeyedVectors` + Vector representations of the documents in the corpus. Each vector has size == `vector_size` + + Returns + ------- + bool + Whether or not the passed tag exists in our vocabulary. + + """ if isinstance(index, integer_types + (integer,)): return index < docvecs.count else: @@ -868,6 +1234,7 @@ def _tag_seen(self, index, docvecs): class Doc2VecTrainables(Word2VecTrainables): + """Represents the inner shallow neural network used to train :class:`~gensim.models.doc2vec.Doc2Vec`.""" def __init__(self, dm=1, dm_concat=0, dm_tag_count=1, vector_size=100, seed=1, hashfxn=hash, window=5): super(Doc2VecTrainables, self).__init__( vector_size=vector_size, seed=seed, hashfxn=hashfxn) @@ -915,13 +1282,28 @@ def get_doctag_trainables(self, doc_words, vector_size): class TaggedBrownCorpus(object): - """Iterate over documents from the Brown corpus (part of NLTK data), yielding - each document out as a TaggedDocument object.""" + """Reader for the `Brown corpus (part of NLTK data) `_.""" def __init__(self, dirname): + """ + + Parameters + ---------- + dirname : str + Path to folder with Brown corpus. + + """ self.dirname = dirname def __iter__(self): + """Iterate through the corpus. + + Yields + ------ + :class:`~gensim.models.doc2vec.TaggedDocument` + Document from `source`. + + """ for fname in os.listdir(self.dirname): fname = os.path.join(self.dirname, fname) if not os.path.isfile(fname): @@ -939,29 +1321,40 @@ def __iter__(self): class TaggedLineDocument(object): - """Simple format: one document = one line = one TaggedDocument object. + """Simple reader for format: one document = one line = one :class:`~gensim.models.doc2vec.TaggedDocument` object. - Words are expected to be already preprocessed and separated by whitespace, - tags are constructed automatically from the document line number.""" + Words are expected to be already preprocessed and separated by whitespace, tags are constructed automatically + from the document line number. + """ def __init__(self, source): """ - `source` can be either a string (filename) or a file object. - Example:: - - documents = TaggedLineDocument('myfile.txt') - - Or for compressed files:: + Parameters + ---------- + source : str + Path to source file. - documents = TaggedLineDocument('compressed_text.txt.bz2') - documents = TaggedLineDocument('compressed_text.txt.gz') + Examples + -------- + >>> from gensim.test.utils import datapath + >>> from gensim.models.doc2vec import TaggedLineDocument + >>> + >>> for document in TaggedLineDocument(datapath("head500.noblanks.cor")): + ... pass """ self.source = source def __iter__(self): - """Iterate through the lines in the source.""" + """Iterate through the lines in the source. + + Yields + ------ + :class:`~gensim.models.doc2vec.TaggedDocument` + Document from `source`. + + """ try: # Assume it is a file-like object and try treating it as such # Things that don't have seek will trigger an exception diff --git a/gensim/models/doc2vec_inner.c b/gensim/models/doc2vec_inner.c index 103f34da81..3df79dfaee 100644 --- a/gensim/models/doc2vec_inner.c +++ b/gensim/models/doc2vec_inner.c @@ -524,7 +524,6 @@ static CYTHON_INLINE float __PYX_NAN() { #include #include "numpy/arrayobject.h" #include "numpy/ufuncobject.h" -#include #include "voidptr.h" #ifdef _OPENMP #include @@ -768,7 +767,7 @@ static const char *__pyx_f[] = { #endif -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":743 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":743 * # in Cython to enable them only on the right systems. * * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<< @@ -777,7 +776,7 @@ static const char *__pyx_f[] = { */ typedef npy_int8 __pyx_t_5numpy_int8_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":744 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":744 * * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<< @@ -786,7 +785,7 @@ typedef npy_int8 __pyx_t_5numpy_int8_t; */ typedef npy_int16 __pyx_t_5numpy_int16_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":745 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":745 * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<< @@ -795,7 +794,7 @@ typedef npy_int16 __pyx_t_5numpy_int16_t; */ typedef npy_int32 __pyx_t_5numpy_int32_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":746 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":746 * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<< @@ -804,7 +803,7 @@ typedef npy_int32 __pyx_t_5numpy_int32_t; */ typedef npy_int64 __pyx_t_5numpy_int64_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":750 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":750 * #ctypedef npy_int128 int128_t * * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<< @@ -813,7 +812,7 @@ typedef npy_int64 __pyx_t_5numpy_int64_t; */ typedef npy_uint8 __pyx_t_5numpy_uint8_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":751 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":751 * * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<< @@ -822,7 +821,7 @@ typedef npy_uint8 __pyx_t_5numpy_uint8_t; */ typedef npy_uint16 __pyx_t_5numpy_uint16_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":752 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":752 * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<< @@ -831,7 +830,7 @@ typedef npy_uint16 __pyx_t_5numpy_uint16_t; */ typedef npy_uint32 __pyx_t_5numpy_uint32_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":753 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":753 * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<< @@ -840,7 +839,7 @@ typedef npy_uint32 __pyx_t_5numpy_uint32_t; */ typedef npy_uint64 __pyx_t_5numpy_uint64_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":757 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":757 * #ctypedef npy_uint128 uint128_t * * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<< @@ -849,7 +848,7 @@ typedef npy_uint64 __pyx_t_5numpy_uint64_t; */ typedef npy_float32 __pyx_t_5numpy_float32_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":758 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":758 * * ctypedef npy_float32 float32_t * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<< @@ -858,7 +857,7 @@ typedef npy_float32 __pyx_t_5numpy_float32_t; */ typedef npy_float64 __pyx_t_5numpy_float64_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":767 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":767 * # The int types are mapped a bit surprising -- * # numpy.int corresponds to 'l' and numpy.long to 'q' * ctypedef npy_long int_t # <<<<<<<<<<<<<< @@ -867,7 +866,7 @@ typedef npy_float64 __pyx_t_5numpy_float64_t; */ typedef npy_long __pyx_t_5numpy_int_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":768 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":768 * # numpy.int corresponds to 'l' and numpy.long to 'q' * ctypedef npy_long int_t * ctypedef npy_longlong long_t # <<<<<<<<<<<<<< @@ -876,7 +875,7 @@ typedef npy_long __pyx_t_5numpy_int_t; */ typedef npy_longlong __pyx_t_5numpy_long_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":769 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":769 * ctypedef npy_long int_t * ctypedef npy_longlong long_t * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<< @@ -885,7 +884,7 @@ typedef npy_longlong __pyx_t_5numpy_long_t; */ typedef npy_longlong __pyx_t_5numpy_longlong_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 * ctypedef npy_longlong longlong_t * * ctypedef npy_ulong uint_t # <<<<<<<<<<<<<< @@ -894,7 +893,7 @@ typedef npy_longlong __pyx_t_5numpy_longlong_t; */ typedef npy_ulong __pyx_t_5numpy_uint_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":772 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":772 * * ctypedef npy_ulong uint_t * ctypedef npy_ulonglong ulong_t # <<<<<<<<<<<<<< @@ -903,7 +902,7 @@ typedef npy_ulong __pyx_t_5numpy_uint_t; */ typedef npy_ulonglong __pyx_t_5numpy_ulong_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":773 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":773 * ctypedef npy_ulong uint_t * ctypedef npy_ulonglong ulong_t * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<< @@ -912,7 +911,7 @@ typedef npy_ulonglong __pyx_t_5numpy_ulong_t; */ typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":775 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":775 * ctypedef npy_ulonglong ulonglong_t * * ctypedef npy_intp intp_t # <<<<<<<<<<<<<< @@ -921,7 +920,7 @@ typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; */ typedef npy_intp __pyx_t_5numpy_intp_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":776 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":776 * * ctypedef npy_intp intp_t * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<< @@ -930,7 +929,7 @@ typedef npy_intp __pyx_t_5numpy_intp_t; */ typedef npy_uintp __pyx_t_5numpy_uintp_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":778 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":778 * ctypedef npy_uintp uintp_t * * ctypedef npy_double float_t # <<<<<<<<<<<<<< @@ -939,7 +938,7 @@ typedef npy_uintp __pyx_t_5numpy_uintp_t; */ typedef npy_double __pyx_t_5numpy_float_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":779 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":779 * * ctypedef npy_double float_t * ctypedef npy_double double_t # <<<<<<<<<<<<<< @@ -948,7 +947,7 @@ typedef npy_double __pyx_t_5numpy_float_t; */ typedef npy_double __pyx_t_5numpy_double_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 * ctypedef npy_double float_t * ctypedef npy_double double_t * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<< @@ -992,7 +991,7 @@ static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(do /*--- Type declarations ---*/ -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":782 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":782 * ctypedef npy_longdouble longdouble_t * * ctypedef npy_cfloat cfloat_t # <<<<<<<<<<<<<< @@ -1001,7 +1000,7 @@ static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(do */ typedef npy_cfloat __pyx_t_5numpy_cfloat_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 * * ctypedef npy_cfloat cfloat_t * ctypedef npy_cdouble cdouble_t # <<<<<<<<<<<<<< @@ -1010,7 +1009,7 @@ typedef npy_cfloat __pyx_t_5numpy_cfloat_t; */ typedef npy_cdouble __pyx_t_5numpy_cdouble_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":784 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":784 * ctypedef npy_cfloat cfloat_t * ctypedef npy_cdouble cdouble_t * ctypedef npy_clongdouble clongdouble_t # <<<<<<<<<<<<<< @@ -1019,7 +1018,7 @@ typedef npy_cdouble __pyx_t_5numpy_cdouble_t; */ typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t; -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 * ctypedef npy_clongdouble clongdouble_t * * ctypedef npy_cdouble complex_t # <<<<<<<<<<<<<< @@ -1578,8 +1577,6 @@ static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0; static PyTypeObject *__pyx_ptype_5numpy_ufunc = 0; static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *, char *, char *, int *); /*proto*/ -/* Module declarations from 'libc.math' */ - /* Module declarations from 'gensim.models.word2vec_inner' */ static __pyx_t_6gensim_6models_14word2vec_inner_scopy_ptr *__pyx_vp_6gensim_6models_14word2vec_inner_scopy = 0; #define __pyx_v_6gensim_6models_14word2vec_inner_scopy (*__pyx_vp_6gensim_6models_14word2vec_inner_scopy) @@ -1599,10 +1596,6 @@ static __pyx_t_6gensim_6models_14word2vec_inner_our_dot_ptr *__pyx_vp_6gensim_6m #define __pyx_v_6gensim_6models_14word2vec_inner_our_dot (*__pyx_vp_6gensim_6models_14word2vec_inner_our_dot) static __pyx_t_6gensim_6models_14word2vec_inner_our_saxpy_ptr *__pyx_vp_6gensim_6models_14word2vec_inner_our_saxpy = 0; #define __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy (*__pyx_vp_6gensim_6models_14word2vec_inner_our_saxpy) -static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t (*__pyx_f_6gensim_6models_14word2vec_inner_our_dot_double)(int const *, float const *, int const *, float const *, int const *); /*proto*/ -static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t (*__pyx_f_6gensim_6models_14word2vec_inner_our_dot_float)(int const *, float const *, int const *, float const *, int const *); /*proto*/ -static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t (*__pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas)(int const *, float const *, int const *, float const *, int const *); /*proto*/ -static void (*__pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas)(int const *, float const *, float const *, int const *, float *, int const *); /*proto*/ static unsigned PY_LONG_LONG (*__pyx_f_6gensim_6models_14word2vec_inner_bisect_left)(__pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG); /*proto*/ static unsigned PY_LONG_LONG (*__pyx_f_6gensim_6models_14word2vec_inner_random_int32)(unsigned PY_LONG_LONG *); /*proto*/ @@ -1676,7 +1669,6 @@ static const char __pyx_k_vectors[] = "vectors"; static const char __pyx_k_vlookup[] = "vlookup"; static const char __pyx_k_codelens[] = "codelens"; static const char __pyx_k_negative[] = "negative"; -static const char __pyx_k_word2vec[] = "word2vec"; static const char __pyx_k_cbow_mean[] = "cbow_mean"; static const char __pyx_k_cum_table[] = "cum_table"; static const char __pyx_k_doc_words[] = "doc_words"; @@ -1694,7 +1686,6 @@ static const char __pyx_k_learn_words[] = "learn_words"; static const char __pyx_k_next_random[] = "next_random"; static const char __pyx_k_train_words[] = "train_words"; static const char __pyx_k_vector_size[] = "vector_size"; -static const char __pyx_k_FAST_VERSION[] = "FAST_VERSION"; static const char __pyx_k_RuntimeError[] = "RuntimeError"; static const char __pyx_k_dm_tag_count[] = "dm_tag_count"; static const char __pyx_k_doctag_locks[] = "doctag_locks"; @@ -1734,10 +1725,10 @@ static const char __pyx_k_numpy_core_multiarray_failed_to[] = "numpy.core.multia static const char __pyx_k_unknown_dtype_code_in_numpy_pxd[] = "unknown dtype code in numpy.pxd (%d)"; static const char __pyx_k_Format_string_allocated_too_shor[] = "Format string allocated too short, see comment in numpy.pxd"; static const char __pyx_k_Non_native_byte_order_not_suppor[] = "Non-native byte order not supported"; +static const char __pyx_k_Optimized_cython_functions_for_t[] = "Optimized cython functions for training :class:`~gensim.models.doc2vec.Doc2Vec` model."; static const char __pyx_k_ndarray_is_not_Fortran_contiguou[] = "ndarray is not Fortran contiguous"; static const char __pyx_k_numpy_core_umath_failed_to_impor[] = "numpy.core.umath failed to import"; static const char __pyx_k_Format_string_allocated_too_shor_2[] = "Format string allocated too short."; -static PyObject *__pyx_n_s_FAST_VERSION; static PyObject *__pyx_kp_u_Format_string_allocated_too_shor; static PyObject *__pyx_kp_u_Format_string_allocated_too_shor_2; static PyObject *__pyx_n_s_ImportError; @@ -1840,7 +1831,6 @@ static PyObject *__pyx_n_s_vocab; static PyObject *__pyx_n_s_vocabulary; static PyObject *__pyx_n_s_window; static PyObject *__pyx_n_s_window_indexes; -static PyObject *__pyx_n_s_word2vec; static PyObject *__pyx_n_s_word_locks; static PyObject *__pyx_n_s_word_locks_2; static PyObject *__pyx_n_s_word_vectors; @@ -1878,7 +1868,7 @@ static PyObject *__pyx_codeobj__19; static PyObject *__pyx_codeobj__21; static PyObject *__pyx_codeobj__23; -/* "gensim/models/doc2vec_inner.pyx":41 +/* "gensim/models/doc2vec_inner.pyx":35 * DEF MAX_EXP = 6 * * cdef void fast_document_dbow_hs( # <<<<<<<<<<<<<< @@ -1897,7 +1887,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ int __pyx_t_3; int __pyx_t_4; - /* "gensim/models/doc2vec_inner.pyx":48 + /* "gensim/models/doc2vec_inner.pyx":42 * * cdef long long a, b * cdef long long row1 = context_index * size, row2 # <<<<<<<<<<<<<< @@ -1906,7 +1896,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ */ __pyx_v_row1 = (__pyx_v_context_index * __pyx_v_size); - /* "gensim/models/doc2vec_inner.pyx":51 + /* "gensim/models/doc2vec_inner.pyx":45 * cdef REAL_t f, g * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1915,7 +1905,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/doc2vec_inner.pyx":52 + /* "gensim/models/doc2vec_inner.pyx":46 * * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelen): # <<<<<<<<<<<<<< @@ -1926,7 +1916,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_b = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":53 + /* "gensim/models/doc2vec_inner.pyx":47 * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelen): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -1935,7 +1925,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "gensim/models/doc2vec_inner.pyx":54 + /* "gensim/models/doc2vec_inner.pyx":48 * for b in range(codelen): * row2 = word_point[b] * size * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -1944,7 +1934,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ */ __pyx_v_f = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_context_vectors[__pyx_v_row1])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":55 + /* "gensim/models/doc2vec_inner.pyx":49 * row2 = word_point[b] * size * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -1962,7 +1952,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":56 + /* "gensim/models/doc2vec_inner.pyx":50 * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -1971,7 +1961,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ */ goto __pyx_L3_continue; - /* "gensim/models/doc2vec_inner.pyx":55 + /* "gensim/models/doc2vec_inner.pyx":49 * row2 = word_point[b] * size * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -1980,7 +1970,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ */ } - /* "gensim/models/doc2vec_inner.pyx":57 + /* "gensim/models/doc2vec_inner.pyx":51 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -1989,7 +1979,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "gensim/models/doc2vec_inner.pyx":58 + /* "gensim/models/doc2vec_inner.pyx":52 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -1998,7 +1988,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/doc2vec_inner.pyx":59 + /* "gensim/models/doc2vec_inner.pyx":53 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2007,7 +1997,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":60 + /* "gensim/models/doc2vec_inner.pyx":54 * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< @@ -2017,7 +2007,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ __pyx_t_3 = (__pyx_v_learn_hidden != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":61 + /* "gensim/models/doc2vec_inner.pyx":55 * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if learn_hidden: * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2026,7 +2016,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_context_vectors[__pyx_v_row1])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":60 + /* "gensim/models/doc2vec_inner.pyx":54 * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< @@ -2037,7 +2027,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ __pyx_L3_continue:; } - /* "gensim/models/doc2vec_inner.pyx":62 + /* "gensim/models/doc2vec_inner.pyx":56 * if learn_hidden: * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1[row2], &ONE) * if learn_context: # <<<<<<<<<<<<<< @@ -2047,7 +2037,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ __pyx_t_3 = (__pyx_v_learn_context != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":63 + /* "gensim/models/doc2vec_inner.pyx":57 * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1[row2], &ONE) * if learn_context: * our_saxpy(&size, &context_locks[context_index], work, &ONE, &context_vectors[row1], &ONE) # <<<<<<<<<<<<<< @@ -2056,7 +2046,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_context_locks[__pyx_v_context_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_context_vectors[__pyx_v_row1])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":62 + /* "gensim/models/doc2vec_inner.pyx":56 * if learn_hidden: * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1[row2], &ONE) * if learn_context: # <<<<<<<<<<<<<< @@ -2065,7 +2055,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ */ } - /* "gensim/models/doc2vec_inner.pyx":41 + /* "gensim/models/doc2vec_inner.pyx":35 * DEF MAX_EXP = 6 * * cdef void fast_document_dbow_hs( # <<<<<<<<<<<<<< @@ -2076,7 +2066,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_ /* function exit code */ } -/* "gensim/models/doc2vec_inner.pyx":66 +/* "gensim/models/doc2vec_inner.pyx":60 * * * cdef unsigned long long fast_document_dbow_neg( # <<<<<<<<<<<<<< @@ -2099,7 +2089,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume int __pyx_t_3; int __pyx_t_4; - /* "gensim/models/doc2vec_inner.pyx":73 + /* "gensim/models/doc2vec_inner.pyx":67 * * cdef long long a * cdef long long row1 = context_index * size, row2 # <<<<<<<<<<<<<< @@ -2108,7 +2098,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_row1 = (__pyx_v_context_index * __pyx_v_size); - /* "gensim/models/doc2vec_inner.pyx":74 + /* "gensim/models/doc2vec_inner.pyx":68 * cdef long long a * cdef long long row1 = context_index * size, row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -2117,7 +2107,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_modulo = 281474976710655ULL; - /* "gensim/models/doc2vec_inner.pyx":79 + /* "gensim/models/doc2vec_inner.pyx":73 * cdef int d * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2126,7 +2116,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/doc2vec_inner.pyx":81 + /* "gensim/models/doc2vec_inner.pyx":75 * memset(work, 0, size * cython.sizeof(REAL_t)) * * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -2137,7 +2127,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":82 + /* "gensim/models/doc2vec_inner.pyx":76 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2147,7 +2137,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":83 + /* "gensim/models/doc2vec_inner.pyx":77 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -2156,7 +2146,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_target_index = __pyx_v_word_index; - /* "gensim/models/doc2vec_inner.pyx":84 + /* "gensim/models/doc2vec_inner.pyx":78 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -2165,7 +2155,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_label = __pyx_v_6gensim_6models_13doc2vec_inner_ONEF; - /* "gensim/models/doc2vec_inner.pyx":82 + /* "gensim/models/doc2vec_inner.pyx":76 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2175,7 +2165,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume goto __pyx_L5; } - /* "gensim/models/doc2vec_inner.pyx":86 + /* "gensim/models/doc2vec_inner.pyx":80 * label = ONEF * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< @@ -2185,7 +2175,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume /*else*/ { __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/doc2vec_inner.pyx":87 + /* "gensim/models/doc2vec_inner.pyx":81 * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -2194,7 +2184,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "gensim/models/doc2vec_inner.pyx":88 + /* "gensim/models/doc2vec_inner.pyx":82 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2204,7 +2194,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":89 + /* "gensim/models/doc2vec_inner.pyx":83 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< @@ -2213,7 +2203,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ goto __pyx_L3_continue; - /* "gensim/models/doc2vec_inner.pyx":88 + /* "gensim/models/doc2vec_inner.pyx":82 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2222,7 +2212,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ } - /* "gensim/models/doc2vec_inner.pyx":90 + /* "gensim/models/doc2vec_inner.pyx":84 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -2233,7 +2223,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume } __pyx_L5:; - /* "gensim/models/doc2vec_inner.pyx":91 + /* "gensim/models/doc2vec_inner.pyx":85 * continue * label = 0.0 * row2 = target_index * size # <<<<<<<<<<<<<< @@ -2242,7 +2232,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "gensim/models/doc2vec_inner.pyx":92 + /* "gensim/models/doc2vec_inner.pyx":86 * label = 0.0 * row2 = target_index * size * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2251,7 +2241,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_f = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_context_vectors[__pyx_v_row1])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":93 + /* "gensim/models/doc2vec_inner.pyx":87 * row2 = target_index * size * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2269,7 +2259,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_L8_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":94 + /* "gensim/models/doc2vec_inner.pyx":88 * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2278,7 +2268,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ goto __pyx_L3_continue; - /* "gensim/models/doc2vec_inner.pyx":93 + /* "gensim/models/doc2vec_inner.pyx":87 * row2 = target_index * size * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2287,7 +2277,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ } - /* "gensim/models/doc2vec_inner.pyx":95 + /* "gensim/models/doc2vec_inner.pyx":89 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2296,7 +2286,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "gensim/models/doc2vec_inner.pyx":96 + /* "gensim/models/doc2vec_inner.pyx":90 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2305,7 +2295,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/doc2vec_inner.pyx":97 + /* "gensim/models/doc2vec_inner.pyx":91 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2314,7 +2304,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":98 + /* "gensim/models/doc2vec_inner.pyx":92 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< @@ -2324,7 +2314,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_t_3 = (__pyx_v_learn_hidden != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":99 + /* "gensim/models/doc2vec_inner.pyx":93 * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * if learn_hidden: * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2333,7 +2323,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_context_vectors[__pyx_v_row1])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":98 + /* "gensim/models/doc2vec_inner.pyx":92 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< @@ -2344,7 +2334,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_L3_continue:; } - /* "gensim/models/doc2vec_inner.pyx":100 + /* "gensim/models/doc2vec_inner.pyx":94 * if learn_hidden: * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) * if learn_context: # <<<<<<<<<<<<<< @@ -2354,7 +2344,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_t_3 = (__pyx_v_learn_context != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":101 + /* "gensim/models/doc2vec_inner.pyx":95 * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) * if learn_context: * our_saxpy(&size, &context_locks[context_index], work, &ONE, &context_vectors[row1], &ONE) # <<<<<<<<<<<<<< @@ -2363,7 +2353,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_context_locks[__pyx_v_context_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_context_vectors[__pyx_v_row1])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":100 + /* "gensim/models/doc2vec_inner.pyx":94 * if learn_hidden: * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) * if learn_context: # <<<<<<<<<<<<<< @@ -2372,7 +2362,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ } - /* "gensim/models/doc2vec_inner.pyx":103 + /* "gensim/models/doc2vec_inner.pyx":97 * our_saxpy(&size, &context_locks[context_index], work, &ONE, &context_vectors[row1], &ONE) * * return next_random # <<<<<<<<<<<<<< @@ -2382,7 +2372,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "gensim/models/doc2vec_inner.pyx":66 + /* "gensim/models/doc2vec_inner.pyx":60 * * * cdef unsigned long long fast_document_dbow_neg( # <<<<<<<<<<<<<< @@ -2395,7 +2385,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume return __pyx_r; } -/* "gensim/models/doc2vec_inner.pyx":106 +/* "gensim/models/doc2vec_inner.pyx":100 * * * cdef void fast_document_dm_hs( # <<<<<<<<<<<<<< @@ -2413,7 +2403,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ int __pyx_t_3; int __pyx_t_4; - /* "gensim/models/doc2vec_inner.pyx":117 + /* "gensim/models/doc2vec_inner.pyx":111 * # l1 already composed by caller, passed in as neu1 * # work (also passed in) will accumulate l1 error * for b in range(word_code_len): # <<<<<<<<<<<<<< @@ -2424,7 +2414,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_b = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":118 + /* "gensim/models/doc2vec_inner.pyx":112 * # work (also passed in) will accumulate l1 error * for b in range(word_code_len): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -2433,7 +2423,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "gensim/models/doc2vec_inner.pyx":119 + /* "gensim/models/doc2vec_inner.pyx":113 * for b in range(word_code_len): * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2442,7 +2432,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ */ __pyx_v_f = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":120 + /* "gensim/models/doc2vec_inner.pyx":114 * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2460,7 +2450,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":121 + /* "gensim/models/doc2vec_inner.pyx":115 * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2469,7 +2459,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ */ goto __pyx_L3_continue; - /* "gensim/models/doc2vec_inner.pyx":120 + /* "gensim/models/doc2vec_inner.pyx":114 * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2478,7 +2468,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ */ } - /* "gensim/models/doc2vec_inner.pyx":122 + /* "gensim/models/doc2vec_inner.pyx":116 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2487,7 +2477,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "gensim/models/doc2vec_inner.pyx":123 + /* "gensim/models/doc2vec_inner.pyx":117 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -2496,7 +2486,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/doc2vec_inner.pyx":124 + /* "gensim/models/doc2vec_inner.pyx":118 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2505,7 +2495,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":125 + /* "gensim/models/doc2vec_inner.pyx":119 * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< @@ -2515,7 +2505,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ __pyx_t_3 = (__pyx_v_learn_hidden != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":126 + /* "gensim/models/doc2vec_inner.pyx":120 * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if learn_hidden: * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2524,7 +2514,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":125 + /* "gensim/models/doc2vec_inner.pyx":119 * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< @@ -2535,7 +2525,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ __pyx_L3_continue:; } - /* "gensim/models/doc2vec_inner.pyx":106 + /* "gensim/models/doc2vec_inner.pyx":100 * * * cdef void fast_document_dm_hs( # <<<<<<<<<<<<<< @@ -2546,7 +2536,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_ /* function exit code */ } -/* "gensim/models/doc2vec_inner.pyx":129 +/* "gensim/models/doc2vec_inner.pyx":123 * * * cdef unsigned long long fast_document_dm_neg( # <<<<<<<<<<<<<< @@ -2568,7 +2558,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume int __pyx_t_3; int __pyx_t_4; - /* "gensim/models/doc2vec_inner.pyx":135 + /* "gensim/models/doc2vec_inner.pyx":129 * * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -2577,7 +2567,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_modulo = 281474976710655ULL; - /* "gensim/models/doc2vec_inner.pyx":142 + /* "gensim/models/doc2vec_inner.pyx":136 * # l1 already composed by caller, passed in as neu1 * # work (also passsed in) will accumulate l1 error for outside application * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -2588,7 +2578,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":143 + /* "gensim/models/doc2vec_inner.pyx":137 * # work (also passsed in) will accumulate l1 error for outside application * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2598,7 +2588,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":144 + /* "gensim/models/doc2vec_inner.pyx":138 * for d in range(negative+1): * if d == 0: * target_index = predict_word_index # <<<<<<<<<<<<<< @@ -2607,7 +2597,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_target_index = __pyx_v_predict_word_index; - /* "gensim/models/doc2vec_inner.pyx":145 + /* "gensim/models/doc2vec_inner.pyx":139 * if d == 0: * target_index = predict_word_index * label = ONEF # <<<<<<<<<<<<<< @@ -2616,7 +2606,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_label = __pyx_v_6gensim_6models_13doc2vec_inner_ONEF; - /* "gensim/models/doc2vec_inner.pyx":143 + /* "gensim/models/doc2vec_inner.pyx":137 * # work (also passsed in) will accumulate l1 error for outside application * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2626,7 +2616,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume goto __pyx_L5; } - /* "gensim/models/doc2vec_inner.pyx":147 + /* "gensim/models/doc2vec_inner.pyx":141 * label = ONEF * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< @@ -2636,7 +2626,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume /*else*/ { __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/doc2vec_inner.pyx":148 + /* "gensim/models/doc2vec_inner.pyx":142 * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -2645,7 +2635,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "gensim/models/doc2vec_inner.pyx":149 + /* "gensim/models/doc2vec_inner.pyx":143 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == predict_word_index: # <<<<<<<<<<<<<< @@ -2655,7 +2645,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_predict_word_index) != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":150 + /* "gensim/models/doc2vec_inner.pyx":144 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == predict_word_index: * continue # <<<<<<<<<<<<<< @@ -2664,7 +2654,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ goto __pyx_L3_continue; - /* "gensim/models/doc2vec_inner.pyx":149 + /* "gensim/models/doc2vec_inner.pyx":143 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == predict_word_index: # <<<<<<<<<<<<<< @@ -2673,7 +2663,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ } - /* "gensim/models/doc2vec_inner.pyx":151 + /* "gensim/models/doc2vec_inner.pyx":145 * if target_index == predict_word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -2684,7 +2674,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume } __pyx_L5:; - /* "gensim/models/doc2vec_inner.pyx":153 + /* "gensim/models/doc2vec_inner.pyx":147 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< @@ -2693,7 +2683,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "gensim/models/doc2vec_inner.pyx":154 + /* "gensim/models/doc2vec_inner.pyx":148 * * row2 = target_index * size * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2702,7 +2692,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_f = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":155 + /* "gensim/models/doc2vec_inner.pyx":149 * row2 = target_index * size * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2720,7 +2710,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_L8_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":156 + /* "gensim/models/doc2vec_inner.pyx":150 * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2729,7 +2719,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ goto __pyx_L3_continue; - /* "gensim/models/doc2vec_inner.pyx":155 + /* "gensim/models/doc2vec_inner.pyx":149 * row2 = target_index * size * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2738,7 +2728,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ } - /* "gensim/models/doc2vec_inner.pyx":157 + /* "gensim/models/doc2vec_inner.pyx":151 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2747,7 +2737,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "gensim/models/doc2vec_inner.pyx":158 + /* "gensim/models/doc2vec_inner.pyx":152 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2756,7 +2746,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/doc2vec_inner.pyx":159 + /* "gensim/models/doc2vec_inner.pyx":153 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2765,7 +2755,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":160 + /* "gensim/models/doc2vec_inner.pyx":154 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< @@ -2775,7 +2765,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_t_3 = (__pyx_v_learn_hidden != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":161 + /* "gensim/models/doc2vec_inner.pyx":155 * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * if learn_hidden: * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2784,7 +2774,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":160 + /* "gensim/models/doc2vec_inner.pyx":154 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< @@ -2795,7 +2785,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_L3_continue:; } - /* "gensim/models/doc2vec_inner.pyx":163 + /* "gensim/models/doc2vec_inner.pyx":157 * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * * return next_random # <<<<<<<<<<<<<< @@ -2805,7 +2795,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "gensim/models/doc2vec_inner.pyx":129 + /* "gensim/models/doc2vec_inner.pyx":123 * * * cdef unsigned long long fast_document_dm_neg( # <<<<<<<<<<<<<< @@ -2818,7 +2808,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume return __pyx_r; } -/* "gensim/models/doc2vec_inner.pyx":165 +/* "gensim/models/doc2vec_inner.pyx":159 * return next_random * * cdef void fast_document_dmc_hs( # <<<<<<<<<<<<<< @@ -2836,7 +2826,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t int __pyx_t_3; int __pyx_t_4; - /* "gensim/models/doc2vec_inner.pyx":177 + /* "gensim/models/doc2vec_inner.pyx":171 * # l1 already composed by caller, passed in as neu1 * # work accumulates net l1 error; eventually applied by caller * for b in range(word_code_len): # <<<<<<<<<<<<<< @@ -2847,7 +2837,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_b = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":178 + /* "gensim/models/doc2vec_inner.pyx":172 * # work accumulates net l1 error; eventually applied by caller * for b in range(word_code_len): * row2 = word_point[b] * layer1_size # <<<<<<<<<<<<<< @@ -2856,7 +2846,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_layer1_size); - /* "gensim/models/doc2vec_inner.pyx":179 + /* "gensim/models/doc2vec_inner.pyx":173 * for b in range(word_code_len): * row2 = word_point[b] * layer1_size * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2865,7 +2855,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t */ __pyx_v_f = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_layer1_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":180 + /* "gensim/models/doc2vec_inner.pyx":174 * row2 = word_point[b] * layer1_size * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2883,7 +2873,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":181 + /* "gensim/models/doc2vec_inner.pyx":175 * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2892,7 +2882,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t */ goto __pyx_L3_continue; - /* "gensim/models/doc2vec_inner.pyx":180 + /* "gensim/models/doc2vec_inner.pyx":174 * row2 = word_point[b] * layer1_size * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2901,7 +2891,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t */ } - /* "gensim/models/doc2vec_inner.pyx":182 + /* "gensim/models/doc2vec_inner.pyx":176 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2910,7 +2900,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "gensim/models/doc2vec_inner.pyx":183 + /* "gensim/models/doc2vec_inner.pyx":177 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -2919,7 +2909,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/doc2vec_inner.pyx":184 + /* "gensim/models/doc2vec_inner.pyx":178 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha * our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2928,7 +2918,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_layer1_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":185 + /* "gensim/models/doc2vec_inner.pyx":179 * g = (1 - word_code[b] - f) * alpha * our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< @@ -2938,7 +2928,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t __pyx_t_3 = (__pyx_v_learn_hidden != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":186 + /* "gensim/models/doc2vec_inner.pyx":180 * our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) * if learn_hidden: * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2947,7 +2937,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_layer1_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":185 + /* "gensim/models/doc2vec_inner.pyx":179 * g = (1 - word_code[b] - f) * alpha * our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< @@ -2958,7 +2948,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t __pyx_L3_continue:; } - /* "gensim/models/doc2vec_inner.pyx":165 + /* "gensim/models/doc2vec_inner.pyx":159 * return next_random * * cdef void fast_document_dmc_hs( # <<<<<<<<<<<<<< @@ -2969,7 +2959,7 @@ static void __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t /* function exit code */ } -/* "gensim/models/doc2vec_inner.pyx":189 +/* "gensim/models/doc2vec_inner.pyx":183 * * * cdef unsigned long long fast_document_dmc_neg( # <<<<<<<<<<<<<< @@ -2991,7 +2981,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume int __pyx_t_3; int __pyx_t_4; - /* "gensim/models/doc2vec_inner.pyx":196 + /* "gensim/models/doc2vec_inner.pyx":190 * cdef long long a * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -3000,7 +2990,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_modulo = 281474976710655ULL; - /* "gensim/models/doc2vec_inner.pyx":203 + /* "gensim/models/doc2vec_inner.pyx":197 * # l1 already composed by caller, passed in as neu1 * # work accumulates net l1 error; eventually applied by caller * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -3011,7 +3001,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":204 + /* "gensim/models/doc2vec_inner.pyx":198 * # work accumulates net l1 error; eventually applied by caller * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -3021,7 +3011,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":205 + /* "gensim/models/doc2vec_inner.pyx":199 * for d in range(negative+1): * if d == 0: * target_index = predict_word_index # <<<<<<<<<<<<<< @@ -3030,7 +3020,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_target_index = __pyx_v_predict_word_index; - /* "gensim/models/doc2vec_inner.pyx":206 + /* "gensim/models/doc2vec_inner.pyx":200 * if d == 0: * target_index = predict_word_index * label = ONEF # <<<<<<<<<<<<<< @@ -3039,7 +3029,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_label = __pyx_v_6gensim_6models_13doc2vec_inner_ONEF; - /* "gensim/models/doc2vec_inner.pyx":204 + /* "gensim/models/doc2vec_inner.pyx":198 * # work accumulates net l1 error; eventually applied by caller * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -3049,7 +3039,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume goto __pyx_L5; } - /* "gensim/models/doc2vec_inner.pyx":208 + /* "gensim/models/doc2vec_inner.pyx":202 * label = ONEF * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< @@ -3059,7 +3049,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume /*else*/ { __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/doc2vec_inner.pyx":209 + /* "gensim/models/doc2vec_inner.pyx":203 * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -3068,7 +3058,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "gensim/models/doc2vec_inner.pyx":210 + /* "gensim/models/doc2vec_inner.pyx":204 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == predict_word_index: # <<<<<<<<<<<<<< @@ -3078,7 +3068,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_predict_word_index) != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":211 + /* "gensim/models/doc2vec_inner.pyx":205 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == predict_word_index: * continue # <<<<<<<<<<<<<< @@ -3087,7 +3077,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ goto __pyx_L3_continue; - /* "gensim/models/doc2vec_inner.pyx":210 + /* "gensim/models/doc2vec_inner.pyx":204 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == predict_word_index: # <<<<<<<<<<<<<< @@ -3096,7 +3086,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ } - /* "gensim/models/doc2vec_inner.pyx":212 + /* "gensim/models/doc2vec_inner.pyx":206 * if target_index == predict_word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -3107,7 +3097,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume } __pyx_L5:; - /* "gensim/models/doc2vec_inner.pyx":214 + /* "gensim/models/doc2vec_inner.pyx":208 * label = 0.0 * * row2 = target_index * layer1_size # <<<<<<<<<<<<<< @@ -3116,7 +3106,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_layer1_size); - /* "gensim/models/doc2vec_inner.pyx":215 + /* "gensim/models/doc2vec_inner.pyx":209 * * row2 = target_index * layer1_size * f = our_dot(&layer1_size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -3125,7 +3115,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_f = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_layer1_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":216 + /* "gensim/models/doc2vec_inner.pyx":210 * row2 = target_index * layer1_size * f = our_dot(&layer1_size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -3143,7 +3133,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_L8_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":217 + /* "gensim/models/doc2vec_inner.pyx":211 * f = our_dot(&layer1_size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -3152,7 +3142,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ goto __pyx_L3_continue; - /* "gensim/models/doc2vec_inner.pyx":216 + /* "gensim/models/doc2vec_inner.pyx":210 * row2 = target_index * layer1_size * f = our_dot(&layer1_size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -3161,7 +3151,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ } - /* "gensim/models/doc2vec_inner.pyx":218 + /* "gensim/models/doc2vec_inner.pyx":212 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -3170,7 +3160,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "gensim/models/doc2vec_inner.pyx":219 + /* "gensim/models/doc2vec_inner.pyx":213 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -3179,7 +3169,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/doc2vec_inner.pyx":220 + /* "gensim/models/doc2vec_inner.pyx":214 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -3188,7 +3178,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_layer1_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":221 + /* "gensim/models/doc2vec_inner.pyx":215 * g = (label - f) * alpha * our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< @@ -3198,7 +3188,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_t_3 = (__pyx_v_learn_hidden != 0); if (__pyx_t_3) { - /* "gensim/models/doc2vec_inner.pyx":222 + /* "gensim/models/doc2vec_inner.pyx":216 * our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) * if learn_hidden: * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -3207,7 +3197,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_layer1_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":221 + /* "gensim/models/doc2vec_inner.pyx":215 * g = (label - f) * alpha * our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< @@ -3218,7 +3208,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_L3_continue:; } - /* "gensim/models/doc2vec_inner.pyx":224 + /* "gensim/models/doc2vec_inner.pyx":218 * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * * return next_random # <<<<<<<<<<<<<< @@ -3228,7 +3218,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "gensim/models/doc2vec_inner.pyx":189 + /* "gensim/models/doc2vec_inner.pyx":183 * * * cdef unsigned long long fast_document_dmc_neg( # <<<<<<<<<<<<<< @@ -3241,7 +3231,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume return __pyx_r; } -/* "gensim/models/doc2vec_inner.pyx":227 +/* "gensim/models/doc2vec_inner.pyx":221 * * * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< @@ -3251,7 +3241,8 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume /* Python wrapper */ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyMethodDef __pyx_mdef_6gensim_6models_13doc2vec_inner_1train_document_dbow = {"train_document_dbow", (PyCFunction)__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow, METH_VARARGS|METH_KEYWORDS, 0}; +static char __pyx_doc_6gensim_6models_13doc2vec_inner_train_document_dbow[] = "train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None)\nUpdate distributed bag of words model (\"PV-DBOW\") by training on a single document.\n\n Called internally from :meth:`~gensim.models.doc2vec.Doc2Vec.train` and\n :meth:`~gensim.models.doc2vec.Doc2Vec.infer_vector`.\n\n Parameters\n ----------\n model : :class:`~gensim.models.doc2vec.Doc2Vec`\n The model to train.\n doc_words : list of str\n The input document as a list of words to be used for training. Each word will be looked up in\n the model's vocabulary.\n doctag_indexes : list of int\n Indices into `doctag_vectors` used to obtain the tags of the document.\n alpha : float\n Learning rate.\n work : list of float, optional\n Updates to be performed on each neuron in the hidden layer of the underlying network.\n train_words : bool, optional\n Word vectors will be updated exactly as per Word2Vec skip-gram training only if **both** `learn_words`\n and `train_words` are set to True.\n learn_doctags : bool, optional\n Whether the tag vectors should be updated.\n learn_words : bool, optional\n Word vectors will be updated exactly as per Word2Vec skip-gram training only if **both**\n `learn_words` and `train_words` are set to True.\n learn_hidden : bool, optional\n Whether or not the weights of the hidden layer will be updated.\n word_vectors : numpy.ndarray, optional\n The vector representation for each word in the vocabulary. If None, these will be retrieved from the model.\n word_locks : numpy.ndarray, optional\n A learning lock factor for each weight in the hidden layer for words, value 0 completely blocks updates,\n a value of 1 allows to update word-vectors.\n doctag_vectors : numpy.ndarray, ""optional\n Vector representations of the tags. If None, these will be retrieved from the model.\n doctag_locks : numpy.ndarray, optional\n The lock factors for each tag, same as `word_locks`, but for document-vectors.\n\n Returns\n -------\n int\n Number of words in the input document that were actually used for training.\n\n "; +static PyMethodDef __pyx_mdef_6gensim_6models_13doc2vec_inner_1train_document_dbow = {"train_document_dbow", (PyCFunction)__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow, METH_VARARGS|METH_KEYWORDS, __pyx_doc_6gensim_6models_13doc2vec_inner_train_document_dbow}; static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_doc_words = 0; @@ -3274,24 +3265,24 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(P PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; values[4] = ((PyObject *)Py_None); - /* "gensim/models/doc2vec_inner.pyx":228 + /* "gensim/models/doc2vec_inner.pyx":222 * * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): - * cdef int hs = model.hs + * """Update distributed bag of words model ("PV-DBOW") by training on a single document. */ values[5] = ((PyObject *)Py_False); values[6] = ((PyObject *)Py_True); values[7] = ((PyObject *)Py_True); values[8] = ((PyObject *)Py_True); - /* "gensim/models/doc2vec_inner.pyx":229 + /* "gensim/models/doc2vec_inner.pyx":223 * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * """Update distributed bag of words model ("PV-DBOW") by training on a single document. + * */ values[9] = ((PyObject *)Py_None); values[10] = ((PyObject *)Py_None); @@ -3339,19 +3330,19 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(P case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doc_words)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dbow", 0, 4, 13, 1); __PYX_ERR(0, 227, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_document_dbow", 0, 4, 13, 1); __PYX_ERR(0, 221, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_indexes)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dbow", 0, 4, 13, 2); __PYX_ERR(0, 227, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_document_dbow", 0, 4, 13, 2); __PYX_ERR(0, 221, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dbow", 0, 4, 13, 3); __PYX_ERR(0, 227, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_document_dbow", 0, 4, 13, 3); __PYX_ERR(0, 221, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 4: @@ -3409,7 +3400,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(P } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_document_dbow") < 0)) __PYX_ERR(0, 227, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_document_dbow") < 0)) __PYX_ERR(0, 221, __pyx_L3_error) } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -3455,7 +3446,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(P } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_document_dbow", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 227, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_document_dbow", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 221, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.doc2vec_inner.train_document_dbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -3463,7 +3454,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(P __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(__pyx_self, __pyx_v_model, __pyx_v_doc_words, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_train_words, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); - /* "gensim/models/doc2vec_inner.pyx":227 + /* "gensim/models/doc2vec_inner.pyx":221 * * * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< @@ -3541,130 +3532,130 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __Pyx_INCREF(__pyx_v_doctag_vectors); __Pyx_INCREF(__pyx_v_doctag_locks); - /* "gensim/models/doc2vec_inner.pyx":230 - * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): + /* "gensim/models/doc2vec_inner.pyx":268 + * + * """ * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 230, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 268, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 230, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 268, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":231 - * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): + /* "gensim/models/doc2vec_inner.pyx":269 + * """ * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.vocabulary.sample != 0) * cdef int _train_words = train_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 231, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 269, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 231, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 269, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":232 + /* "gensim/models/doc2vec_inner.pyx":270 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) # <<<<<<<<<<<<<< * cdef int _train_words = train_words * cdef int _learn_words = learn_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 232, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 270, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 232, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 270, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 232, __pyx_L1_error) + __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 270, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 232, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 270, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_sample = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":233 + /* "gensim/models/doc2vec_inner.pyx":271 * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) * cdef int _train_words = train_words # <<<<<<<<<<<<<< * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 233, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 271, __pyx_L1_error) __pyx_v__train_words = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":234 + /* "gensim/models/doc2vec_inner.pyx":272 * cdef int sample = (model.vocabulary.sample != 0) * cdef int _train_words = train_words * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< * cdef int _learn_hidden = learn_hidden * cdef int _learn_doctags = learn_doctags */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 234, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 272, __pyx_L1_error) __pyx_v__learn_words = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":235 + /* "gensim/models/doc2vec_inner.pyx":273 * cdef int _train_words = train_words * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * cdef int _learn_doctags = learn_doctags * */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 235, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 273, __pyx_L1_error) __pyx_v__learn_hidden = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":236 + /* "gensim/models/doc2vec_inner.pyx":274 * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden * cdef int _learn_doctags = learn_doctags # <<<<<<<<<<<<<< * * cdef REAL_t *_word_vectors */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 236, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 274, __pyx_L1_error) __pyx_v__learn_doctags = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":243 + /* "gensim/models/doc2vec_inner.pyx":281 * cdef REAL_t *_doctag_locks * cdef REAL_t *_work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.trainables.layer1_size * */ - __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 243, __pyx_L1_error) + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 281, __pyx_L1_error) __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/doc2vec_inner.pyx":244 + /* "gensim/models/doc2vec_inner.pyx":282 * cdef REAL_t *_work * cdef REAL_t _alpha = alpha * cdef int size = model.trainables.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_DOCUMENT_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 244, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 282, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 244, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 282, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 244, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 282, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":252 + /* "gensim/models/doc2vec_inner.pyx":290 * cdef int document_len * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 252, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 252, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":256 + /* "gensim/models/doc2vec_inner.pyx":294 * cdef int i, j * cdef unsigned long long r * cdef long result = 0 # <<<<<<<<<<<<<< @@ -3673,7 +3664,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_v_result = 0; - /* "gensim/models/doc2vec_inner.pyx":270 + /* "gensim/models/doc2vec_inner.pyx":308 * * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< @@ -3684,22 +3675,22 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_6 = (__pyx_t_5 != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":271 + /* "gensim/models/doc2vec_inner.pyx":309 * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: * word_vectors = model.wv.vectors # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 271, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 271, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":270 + /* "gensim/models/doc2vec_inner.pyx":308 * * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< @@ -3708,17 +3699,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":272 + /* "gensim/models/doc2vec_inner.pyx":310 * if word_vectors is None: * word_vectors = model.wv.vectors * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< * if doctag_vectors is None: * doctag_vectors = model.docvecs.vectors_docs */ - if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 272, __pyx_L1_error) + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 310, __pyx_L1_error) __pyx_v__word_vectors = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); - /* "gensim/models/doc2vec_inner.pyx":273 + /* "gensim/models/doc2vec_inner.pyx":311 * word_vectors = model.wv.vectors * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: # <<<<<<<<<<<<<< @@ -3729,22 +3720,22 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = (__pyx_t_6 != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":274 + /* "gensim/models/doc2vec_inner.pyx":312 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: * doctag_vectors = model.docvecs.vectors_docs # <<<<<<<<<<<<<< * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 274, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 312, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_docs); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 274, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_docs); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 312, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":273 + /* "gensim/models/doc2vec_inner.pyx":311 * word_vectors = model.wv.vectors * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: # <<<<<<<<<<<<<< @@ -3753,17 +3744,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":275 + /* "gensim/models/doc2vec_inner.pyx":313 * if doctag_vectors is None: * doctag_vectors = model.docvecs.vectors_docs * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) # <<<<<<<<<<<<<< * if word_locks is None: * word_locks = model.trainables.vectors_lockf */ - if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 275, __pyx_L1_error) + if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 313, __pyx_L1_error) __pyx_v__doctag_vectors = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_vectors))); - /* "gensim/models/doc2vec_inner.pyx":276 + /* "gensim/models/doc2vec_inner.pyx":314 * doctag_vectors = model.docvecs.vectors_docs * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< @@ -3774,22 +3765,22 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_6 = (__pyx_t_5 != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":277 + /* "gensim/models/doc2vec_inner.pyx":315 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: * word_locks = model.trainables.vectors_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 277, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 315, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 277, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 315, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":276 + /* "gensim/models/doc2vec_inner.pyx":314 * doctag_vectors = model.docvecs.vectors_docs * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< @@ -3798,17 +3789,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":278 + /* "gensim/models/doc2vec_inner.pyx":316 * if word_locks is None: * word_locks = model.trainables.vectors_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< * if doctag_locks is None: * doctag_locks = model.trainables.vectors_docs_lockf */ - if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 278, __pyx_L1_error) + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 316, __pyx_L1_error) __pyx_v__word_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); - /* "gensim/models/doc2vec_inner.pyx":279 + /* "gensim/models/doc2vec_inner.pyx":317 * word_locks = model.trainables.vectors_lockf * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: # <<<<<<<<<<<<<< @@ -3819,22 +3810,22 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = (__pyx_t_6 != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":280 + /* "gensim/models/doc2vec_inner.pyx":318 * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: * doctag_locks = model.trainables.vectors_docs_lockf # <<<<<<<<<<<<<< * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 280, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 318, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_docs_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 280, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_docs_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 318, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":279 + /* "gensim/models/doc2vec_inner.pyx":317 * word_locks = model.trainables.vectors_lockf * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: # <<<<<<<<<<<<<< @@ -3843,17 +3834,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":281 + /* "gensim/models/doc2vec_inner.pyx":319 * if doctag_locks is None: * doctag_locks = model.trainables.vectors_docs_lockf * _doctag_locks = (np.PyArray_DATA(doctag_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 281, __pyx_L1_error) + if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 319, __pyx_L1_error) __pyx_v__doctag_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_locks))); - /* "gensim/models/doc2vec_inner.pyx":283 + /* "gensim/models/doc2vec_inner.pyx":321 * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< @@ -3863,23 +3854,23 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":284 + /* "gensim/models/doc2vec_inner.pyx":322 * * if hs: * syn1 = (np.PyArray_DATA(model.trainables.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 284, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 284, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 284, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 322, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":283 + /* "gensim/models/doc2vec_inner.pyx":321 * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< @@ -3888,7 +3879,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":286 + /* "gensim/models/doc2vec_inner.pyx":324 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3898,55 +3889,55 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":287 + /* "gensim/models/doc2vec_inner.pyx":325 * * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 287, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 325, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 287, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 325, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 287, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 325, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":288 + /* "gensim/models/doc2vec_inner.pyx":326 * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 288, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 326, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 288, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 326, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 288, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 326, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":289 + /* "gensim/models/doc2vec_inner.pyx":327 * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 289, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 327, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 289, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 327, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_7 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 289, __pyx_L1_error) + __pyx_t_7 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 327, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_7; - /* "gensim/models/doc2vec_inner.pyx":286 + /* "gensim/models/doc2vec_inner.pyx":324 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3955,7 +3946,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":290 + /* "gensim/models/doc2vec_inner.pyx":328 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -3973,41 +3964,41 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_L10_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":291 + /* "gensim/models/doc2vec_inner.pyx":329 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - /* "gensim/models/doc2vec_inner.pyx":290 + /* "gensim/models/doc2vec_inner.pyx":328 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -4016,7 +4007,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":294 + /* "gensim/models/doc2vec_inner.pyx":332 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< @@ -4027,32 +4018,32 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_6 = (__pyx_t_5 != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":295 + /* "gensim/models/doc2vec_inner.pyx":333 * # convert Python structures to primitive types, so we can release the GIL * if work is None: * work = zeros(model.trainables.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _work = np.PyArray_DATA(work) * */ - __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_10) < 0) __PYX_ERR(0, 295, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_10) < 0) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_3, __pyx_t_1); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_3, __pyx_t_1); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -4060,7 +4051,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __Pyx_DECREF_SET(__pyx_v_work, __pyx_t_10); __pyx_t_10 = 0; - /* "gensim/models/doc2vec_inner.pyx":294 + /* "gensim/models/doc2vec_inner.pyx":332 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< @@ -4069,32 +4060,32 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":296 + /* "gensim/models/doc2vec_inner.pyx":334 * if work is None: * work = zeros(model.trainables.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< * * vlookup = model.wv.vocab */ - if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 296, __pyx_L1_error) + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 334, __pyx_L1_error) __pyx_v__work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); - /* "gensim/models/doc2vec_inner.pyx":298 + /* "gensim/models/doc2vec_inner.pyx":336 * _work = np.PyArray_DATA(work) * * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * i = 0 * for token in doc_words: */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 298, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 336, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_10, __pyx_n_s_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 298, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_10, __pyx_n_s_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 336, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_v_vlookup = __pyx_t_1; __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":299 + /* "gensim/models/doc2vec_inner.pyx":337 * * vlookup = model.wv.vocab * i = 0 # <<<<<<<<<<<<<< @@ -4103,7 +4094,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_v_i = 0; - /* "gensim/models/doc2vec_inner.pyx":300 + /* "gensim/models/doc2vec_inner.pyx":338 * vlookup = model.wv.vocab * i = 0 * for token in doc_words: # <<<<<<<<<<<<<< @@ -4114,26 +4105,26 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_1 = __pyx_v_doc_words; __Pyx_INCREF(__pyx_t_1); __pyx_t_7 = 0; __pyx_t_11 = NULL; } else { - __pyx_t_7 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_doc_words); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_7 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_doc_words); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 338, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_11 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_11 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 338, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_11)) { if (likely(PyList_CheckExact(__pyx_t_1))) { if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_10 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_10 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 338, __pyx_L1_error) #else - __pyx_t_10 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_10 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 338, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); #endif } else { if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 338, __pyx_L1_error) #else - __pyx_t_10 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_10 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 338, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); #endif } @@ -4143,7 +4134,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 300, __pyx_L1_error) + else __PYX_ERR(0, 338, __pyx_L1_error) } break; } @@ -4152,16 +4143,16 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_10); __pyx_t_10 = 0; - /* "gensim/models/doc2vec_inner.pyx":301 + /* "gensim/models/doc2vec_inner.pyx":339 * i = 0 * for token in doc_words: * predict_word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if predict_word is None: # shrink document to leave out word * continue # leaving i unchanged */ - __pyx_t_6 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(0, 301, __pyx_L1_error) + __pyx_t_6 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(0, 339, __pyx_L1_error) if ((__pyx_t_6 != 0)) { - __pyx_t_3 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 301, __pyx_L1_error) + __pyx_t_3 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 339, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_10 = __pyx_t_3; __pyx_t_3 = 0; @@ -4172,7 +4163,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __Pyx_XDECREF_SET(__pyx_v_predict_word, __pyx_t_10); __pyx_t_10 = 0; - /* "gensim/models/doc2vec_inner.pyx":302 + /* "gensim/models/doc2vec_inner.pyx":340 * for token in doc_words: * predict_word = vlookup[token] if token in vlookup else None * if predict_word is None: # shrink document to leave out word # <<<<<<<<<<<<<< @@ -4183,7 +4174,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = (__pyx_t_6 != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":303 + /* "gensim/models/doc2vec_inner.pyx":341 * predict_word = vlookup[token] if token in vlookup else None * if predict_word is None: # shrink document to leave out word * continue # leaving i unchanged # <<<<<<<<<<<<<< @@ -4192,7 +4183,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ goto __pyx_L13_continue; - /* "gensim/models/doc2vec_inner.pyx":302 + /* "gensim/models/doc2vec_inner.pyx":340 * for token in doc_words: * predict_word = vlookup[token] if token in vlookup else None * if predict_word is None: # shrink document to leave out word # <<<<<<<<<<<<<< @@ -4201,7 +4192,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":304 + /* "gensim/models/doc2vec_inner.pyx":342 * if predict_word is None: # shrink document to leave out word * continue # leaving i unchanged * if sample and predict_word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -4214,20 +4205,20 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = __pyx_t_6; goto __pyx_L17_bool_binop_done; } - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 304, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 342, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - __pyx_t_3 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 304, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 342, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = PyObject_RichCompare(__pyx_t_10, __pyx_t_3, Py_LT); __Pyx_XGOTREF(__pyx_t_8); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 304, __pyx_L1_error) + __pyx_t_8 = PyObject_RichCompare(__pyx_t_10, __pyx_t_3, Py_LT); __Pyx_XGOTREF(__pyx_t_8); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 342, __pyx_L1_error) __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_8); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(0, 304, __pyx_L1_error) + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_8); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(0, 342, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_t_5 = __pyx_t_6; __pyx_L17_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":305 + /* "gensim/models/doc2vec_inner.pyx":343 * continue # leaving i unchanged * if sample and predict_word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< @@ -4236,7 +4227,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ goto __pyx_L13_continue; - /* "gensim/models/doc2vec_inner.pyx":304 + /* "gensim/models/doc2vec_inner.pyx":342 * if predict_word is None: # shrink document to leave out word * continue # leaving i unchanged * if sample and predict_word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -4245,20 +4236,20 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":306 + /* "gensim/models/doc2vec_inner.pyx":344 * if sample and predict_word.sample_int < random_int32(&next_random): * continue * indexes[i] = predict_word.index # <<<<<<<<<<<<<< * if hs: * codelens[i] = len(predict_word.code) */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 306, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 344, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_12 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 306, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_12 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 344, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/doc2vec_inner.pyx":307 + /* "gensim/models/doc2vec_inner.pyx":345 * continue * indexes[i] = predict_word.index * if hs: # <<<<<<<<<<<<<< @@ -4268,46 +4259,46 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":308 + /* "gensim/models/doc2vec_inner.pyx":346 * indexes[i] = predict_word.index * if hs: * codelens[i] = len(predict_word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(predict_word.code) * points[i] = np.PyArray_DATA(predict_word.point) */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 308, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 346, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_13 = PyObject_Length(__pyx_t_8); if (unlikely(__pyx_t_13 == ((Py_ssize_t)-1))) __PYX_ERR(0, 308, __pyx_L1_error) + __pyx_t_13 = PyObject_Length(__pyx_t_8); if (unlikely(__pyx_t_13 == ((Py_ssize_t)-1))) __PYX_ERR(0, 346, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_13); - /* "gensim/models/doc2vec_inner.pyx":309 + /* "gensim/models/doc2vec_inner.pyx":347 * if hs: * codelens[i] = len(predict_word.code) * codes[i] = np.PyArray_DATA(predict_word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(predict_word.point) * result += 1 */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 347, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - if (!(likely(((__pyx_t_8) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_8, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 309, __pyx_L1_error) + if (!(likely(((__pyx_t_8) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_8, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 347, __pyx_L1_error) (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_8))); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/doc2vec_inner.pyx":310 + /* "gensim/models/doc2vec_inner.pyx":348 * codelens[i] = len(predict_word.code) * codes[i] = np.PyArray_DATA(predict_word.code) * points[i] = np.PyArray_DATA(predict_word.point) # <<<<<<<<<<<<<< * result += 1 * i += 1 */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 310, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 348, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - if (!(likely(((__pyx_t_8) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_8, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 310, __pyx_L1_error) + if (!(likely(((__pyx_t_8) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_8, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 348, __pyx_L1_error) (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_8))); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/doc2vec_inner.pyx":307 + /* "gensim/models/doc2vec_inner.pyx":345 * continue * indexes[i] = predict_word.index * if hs: # <<<<<<<<<<<<<< @@ -4316,7 +4307,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":311 + /* "gensim/models/doc2vec_inner.pyx":349 * codes[i] = np.PyArray_DATA(predict_word.code) * points[i] = np.PyArray_DATA(predict_word.point) * result += 1 # <<<<<<<<<<<<<< @@ -4325,7 +4316,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_v_result = (__pyx_v_result + 1); - /* "gensim/models/doc2vec_inner.pyx":312 + /* "gensim/models/doc2vec_inner.pyx":350 * points[i] = np.PyArray_DATA(predict_word.point) * result += 1 * i += 1 # <<<<<<<<<<<<<< @@ -4334,7 +4325,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_v_i = (__pyx_v_i + 1); - /* "gensim/models/doc2vec_inner.pyx":313 + /* "gensim/models/doc2vec_inner.pyx":351 * result += 1 * i += 1 * if i == MAX_DOCUMENT_LEN: # <<<<<<<<<<<<<< @@ -4344,7 +4335,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = ((__pyx_v_i == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":314 + /* "gensim/models/doc2vec_inner.pyx":352 * i += 1 * if i == MAX_DOCUMENT_LEN: * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< @@ -4353,7 +4344,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ goto __pyx_L14_break; - /* "gensim/models/doc2vec_inner.pyx":313 + /* "gensim/models/doc2vec_inner.pyx":351 * result += 1 * i += 1 * if i == MAX_DOCUMENT_LEN: # <<<<<<<<<<<<<< @@ -4362,7 +4353,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":300 + /* "gensim/models/doc2vec_inner.pyx":338 * vlookup = model.wv.vocab * i = 0 * for token in doc_words: # <<<<<<<<<<<<<< @@ -4374,7 +4365,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_L14_break:; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":315 + /* "gensim/models/doc2vec_inner.pyx":353 * if i == MAX_DOCUMENT_LEN: * break # TODO: log warning, tally overflow? * document_len = i # <<<<<<<<<<<<<< @@ -4383,7 +4374,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_v_document_len = __pyx_v_i; - /* "gensim/models/doc2vec_inner.pyx":317 + /* "gensim/models/doc2vec_inner.pyx":355 * document_len = i * * if _train_words: # <<<<<<<<<<<<<< @@ -4393,7 +4384,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = (__pyx_v__train_words != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":319 + /* "gensim/models/doc2vec_inner.pyx":357 * if _train_words: * # single randint() call avoids a big thread-synchronization slowdown * for i, item in enumerate(model.random.randint(0, window, document_len)): # <<<<<<<<<<<<<< @@ -4401,14 +4392,14 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY * */ __pyx_t_2 = 0; - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_randint); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_randint); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_document_len); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_document_len); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); __pyx_t_14 = NULL; __pyx_t_15 = 0; @@ -4425,7 +4416,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_3)) { PyObject *__pyx_temp[4] = {__pyx_t_14, __pyx_int_0, __pyx_t_8, __pyx_t_10}; - __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_3, __pyx_temp+1-__pyx_t_15, 3+__pyx_t_15); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_3, __pyx_temp+1-__pyx_t_15, 3+__pyx_t_15); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; @@ -4435,7 +4426,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_3)) { PyObject *__pyx_temp[4] = {__pyx_t_14, __pyx_int_0, __pyx_t_8, __pyx_t_10}; - __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_3, __pyx_temp+1-__pyx_t_15, 3+__pyx_t_15); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_3, __pyx_temp+1-__pyx_t_15, 3+__pyx_t_15); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; @@ -4443,7 +4434,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY } else #endif { - __pyx_t_16 = PyTuple_New(3+__pyx_t_15); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_16 = PyTuple_New(3+__pyx_t_15); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_14) { __Pyx_GIVEREF(__pyx_t_14); PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_14); __pyx_t_14 = NULL; @@ -4457,7 +4448,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_15, __pyx_t_10); __pyx_t_8 = 0; __pyx_t_10 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_16, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_16, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; } @@ -4466,9 +4457,9 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_3 = __pyx_t_1; __Pyx_INCREF(__pyx_t_3); __pyx_t_7 = 0; __pyx_t_11 = NULL; } else { - __pyx_t_7 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_7 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_11 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_11 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 357, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { @@ -4476,17 +4467,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_1); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_1); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 357, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_1); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_1); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 357, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 357, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -4496,7 +4487,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 319, __pyx_L1_error) + else __PYX_ERR(0, 357, __pyx_L1_error) } break; } @@ -4507,17 +4498,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/doc2vec_inner.pyx":320 + /* "gensim/models/doc2vec_inner.pyx":358 * # single randint() call avoids a big thread-synchronization slowdown * for i, item in enumerate(model.random.randint(0, window, document_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) */ - __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_12 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_12 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 358, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/doc2vec_inner.pyx":319 + /* "gensim/models/doc2vec_inner.pyx":357 * if _train_words: * # single randint() call avoids a big thread-synchronization slowdown * for i, item in enumerate(model.random.randint(0, window, document_len)): # <<<<<<<<<<<<<< @@ -4527,7 +4518,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":317 + /* "gensim/models/doc2vec_inner.pyx":355 * document_len = i * * if _train_words: # <<<<<<<<<<<<<< @@ -4536,14 +4527,14 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":322 + /* "gensim/models/doc2vec_inner.pyx":360 * reduced_windows[i] = item * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] */ - __pyx_t_7 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_7 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 360, __pyx_L1_error) __pyx_t_17 = 0x2710; if (((__pyx_t_7 < __pyx_t_17) != 0)) { __pyx_t_13 = __pyx_t_7; @@ -4552,7 +4543,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY } __pyx_v_doctag_len = ((int)__pyx_t_13); - /* "gensim/models/doc2vec_inner.pyx":323 + /* "gensim/models/doc2vec_inner.pyx":361 * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * for i in range(doctag_len): # <<<<<<<<<<<<<< @@ -4563,20 +4554,20 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_2; __pyx_t_15+=1) { __pyx_v_i = __pyx_t_15; - /* "gensim/models/doc2vec_inner.pyx":324 + /* "gensim/models/doc2vec_inner.pyx":362 * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 324, __pyx_L1_error) + __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 362, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_3); if (unlikely((__pyx_t_12 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 324, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_3); if (unlikely((__pyx_t_12 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 362, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/doc2vec_inner.pyx":325 + /* "gensim/models/doc2vec_inner.pyx":363 * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< @@ -4586,7 +4577,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_v_result = (__pyx_v_result + 1); } - /* "gensim/models/doc2vec_inner.pyx":328 + /* "gensim/models/doc2vec_inner.pyx":366 * * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< @@ -4601,7 +4592,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY #endif /*try:*/ { - /* "gensim/models/doc2vec_inner.pyx":329 + /* "gensim/models/doc2vec_inner.pyx":367 * # release GIL & train on the document * with nogil: * for i in range(document_len): # <<<<<<<<<<<<<< @@ -4612,7 +4603,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_2; __pyx_t_15+=1) { __pyx_v_i = __pyx_t_15; - /* "gensim/models/doc2vec_inner.pyx":330 + /* "gensim/models/doc2vec_inner.pyx":368 * with nogil: * for i in range(document_len): * if _train_words: # simultaneous skip-gram wordvec-training # <<<<<<<<<<<<<< @@ -4622,7 +4613,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = (__pyx_v__train_words != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":331 + /* "gensim/models/doc2vec_inner.pyx":369 * for i in range(document_len): * if _train_words: # simultaneous skip-gram wordvec-training * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -4631,7 +4622,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/doc2vec_inner.pyx":332 + /* "gensim/models/doc2vec_inner.pyx":370 * if _train_words: # simultaneous skip-gram wordvec-training * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -4641,7 +4632,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = ((__pyx_v_j < 0) != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":333 + /* "gensim/models/doc2vec_inner.pyx":371 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -4650,7 +4641,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_v_j = 0; - /* "gensim/models/doc2vec_inner.pyx":332 + /* "gensim/models/doc2vec_inner.pyx":370 * if _train_words: # simultaneous skip-gram wordvec-training * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -4659,7 +4650,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":334 + /* "gensim/models/doc2vec_inner.pyx":372 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -4668,7 +4659,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/doc2vec_inner.pyx":335 + /* "gensim/models/doc2vec_inner.pyx":373 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > document_len: # <<<<<<<<<<<<<< @@ -4678,7 +4669,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = ((__pyx_v_k > __pyx_v_document_len) != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":336 + /* "gensim/models/doc2vec_inner.pyx":374 * k = i + window + 1 - reduced_windows[i] * if k > document_len: * k = document_len # <<<<<<<<<<<<<< @@ -4687,7 +4678,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_v_k = __pyx_v_document_len; - /* "gensim/models/doc2vec_inner.pyx":335 + /* "gensim/models/doc2vec_inner.pyx":373 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > document_len: # <<<<<<<<<<<<<< @@ -4696,7 +4687,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":337 + /* "gensim/models/doc2vec_inner.pyx":375 * if k > document_len: * k = document_len * for j in range(j, k): # <<<<<<<<<<<<<< @@ -4707,7 +4698,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY for (__pyx_t_18 = __pyx_v_j; __pyx_t_18 < __pyx_t_17; __pyx_t_18+=1) { __pyx_v_j = __pyx_t_18; - /* "gensim/models/doc2vec_inner.pyx":338 + /* "gensim/models/doc2vec_inner.pyx":376 * k = document_len * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -4717,7 +4708,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = ((__pyx_v_j == __pyx_v_i) != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":339 + /* "gensim/models/doc2vec_inner.pyx":377 * for j in range(j, k): * if j == i: * continue # <<<<<<<<<<<<<< @@ -4726,7 +4717,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ goto __pyx_L34_continue; - /* "gensim/models/doc2vec_inner.pyx":338 + /* "gensim/models/doc2vec_inner.pyx":376 * k = document_len * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -4735,7 +4726,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":340 + /* "gensim/models/doc2vec_inner.pyx":378 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< @@ -4745,7 +4736,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":342 + /* "gensim/models/doc2vec_inner.pyx":380 * if hs: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose * fast_document_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], # <<<<<<<<<<<<<< @@ -4754,7 +4745,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__word_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_words, __pyx_v__learn_hidden, __pyx_v__word_locks); - /* "gensim/models/doc2vec_inner.pyx":340 + /* "gensim/models/doc2vec_inner.pyx":378 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< @@ -4763,7 +4754,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":344 + /* "gensim/models/doc2vec_inner.pyx":382 * fast_document_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], * _alpha, _work, _learn_words, _learn_hidden, _word_locks) * if negative: # <<<<<<<<<<<<<< @@ -4773,7 +4764,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":346 + /* "gensim/models/doc2vec_inner.pyx":384 * if negative: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose * next_random = fast_document_dbow_neg(negative, cum_table, cum_table_len, _word_vectors, syn1neg, size, # <<<<<<<<<<<<<< @@ -4782,7 +4773,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_v_next_random = __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v__word_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_words, __pyx_v__learn_hidden, __pyx_v__word_locks); - /* "gensim/models/doc2vec_inner.pyx":344 + /* "gensim/models/doc2vec_inner.pyx":382 * fast_document_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], * _alpha, _work, _learn_words, _learn_hidden, _word_locks) * if negative: # <<<<<<<<<<<<<< @@ -4793,7 +4784,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_L34_continue:; } - /* "gensim/models/doc2vec_inner.pyx":330 + /* "gensim/models/doc2vec_inner.pyx":368 * with nogil: * for i in range(document_len): * if _train_words: # simultaneous skip-gram wordvec-training # <<<<<<<<<<<<<< @@ -4802,7 +4793,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":351 + /* "gensim/models/doc2vec_inner.pyx":389 * * # docvec-training * for j in range(doctag_len): # <<<<<<<<<<<<<< @@ -4813,7 +4804,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_j = __pyx_t_19; - /* "gensim/models/doc2vec_inner.pyx":352 + /* "gensim/models/doc2vec_inner.pyx":390 * # docvec-training * for j in range(doctag_len): * if hs: # <<<<<<<<<<<<<< @@ -4823,7 +4814,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":353 + /* "gensim/models/doc2vec_inner.pyx":391 * for j in range(doctag_len): * if hs: * fast_document_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], # <<<<<<<<<<<<<< @@ -4832,7 +4823,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__doctag_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v__doctag_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_doctags, __pyx_v__learn_hidden, __pyx_v__doctag_locks); - /* "gensim/models/doc2vec_inner.pyx":352 + /* "gensim/models/doc2vec_inner.pyx":390 * # docvec-training * for j in range(doctag_len): * if hs: # <<<<<<<<<<<<<< @@ -4841,7 +4832,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ } - /* "gensim/models/doc2vec_inner.pyx":355 + /* "gensim/models/doc2vec_inner.pyx":393 * fast_document_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) * if negative: # <<<<<<<<<<<<<< @@ -4851,7 +4842,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":356 + /* "gensim/models/doc2vec_inner.pyx":394 * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) * if negative: * next_random = fast_document_dbow_neg(negative, cum_table, cum_table_len, _doctag_vectors, syn1neg, size, # <<<<<<<<<<<<<< @@ -4860,7 +4851,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_v_next_random = __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v__doctag_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v__doctag_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_doctags, __pyx_v__learn_hidden, __pyx_v__doctag_locks); - /* "gensim/models/doc2vec_inner.pyx":355 + /* "gensim/models/doc2vec_inner.pyx":393 * fast_document_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) * if negative: # <<<<<<<<<<<<<< @@ -4872,7 +4863,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY } } - /* "gensim/models/doc2vec_inner.pyx":328 + /* "gensim/models/doc2vec_inner.pyx":366 * * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< @@ -4891,7 +4882,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY } } - /* "gensim/models/doc2vec_inner.pyx":360 + /* "gensim/models/doc2vec_inner.pyx":398 * _learn_doctags, _learn_hidden, _doctag_locks) * * return result # <<<<<<<<<<<<<< @@ -4899,13 +4890,13 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 360, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 398, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_r = __pyx_t_3; __pyx_t_3 = 0; goto __pyx_L0; - /* "gensim/models/doc2vec_inner.pyx":227 + /* "gensim/models/doc2vec_inner.pyx":221 * * * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< @@ -4938,7 +4929,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY return __pyx_r; } -/* "gensim/models/doc2vec_inner.pyx":363 +/* "gensim/models/doc2vec_inner.pyx":401 * * * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< @@ -4948,7 +4939,8 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY /* Python wrapper */ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyMethodDef __pyx_mdef_6gensim_6models_13doc2vec_inner_3train_document_dm = {"train_document_dm", (PyCFunction)__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm, METH_VARARGS|METH_KEYWORDS, 0}; +static char __pyx_doc_6gensim_6models_13doc2vec_inner_2train_document_dm[] = "train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None)\nUpdate distributed memory model (\"PV-DM\") by training on a single document.\n This method implements the DM model with a projection (input) layer that is either the sum or mean of the context\n vectors, depending on the model's `dm_mean` configuration field.\n\n Called internally from :meth:`~gensim.models.doc2vec.Doc2Vec.train` and\n :meth:`~gensim.models.doc2vec.Doc2Vec.infer_vector`.\n\n Parameters\n ----------\n model : :class:`~gensim.models.doc2vec.Doc2Vec`\n The model to train.\n doc_words : list of str\n The input document as a list of words to be used for training. Each word will be looked up in\n the model's vocabulary.\n doctag_indexes : list of int\n Indices into `doctag_vectors` used to obtain the tags of the document.\n alpha : float\n Learning rate.\n work : np.ndarray, optional\n Private working memory for each worker.\n neu1 : np.ndarray, optional\n Private working memory for each worker.\n learn_doctags : bool, optional\n Whether the tag vectors should be updated.\n learn_words : bool, optional\n Word vectors will be updated exactly as per Word2Vec skip-gram training only if **both**\n `learn_words` and `train_words` are set to True.\n learn_hidden : bool, optional\n Whether or not the weights of the hidden layer will be updated.\n word_vectors : numpy.ndarray, optional\n The vector representation for each word in the vocabulary. If None, these will be retrieved from the model.\n word_locks : numpy.ndarray, optional\n A learning lock factor for each weight in the hidden layer for words, value 0 completely blocks updates,\n a value of 1 allows to update word-vectors.\n doctag_vectors : n""umpy.ndarray, optional\n Vector representations of the tags. If None, these will be retrieved from the model.\n doctag_locks : numpy.ndarray, optional\n The lock factors for each tag, same as `word_locks`, but for document-vectors.\n\n Returns\n -------\n int\n Number of words in the input document that were actually used for training.\n\n "; +static PyMethodDef __pyx_mdef_6gensim_6models_13doc2vec_inner_3train_document_dm = {"train_document_dm", (PyCFunction)__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm, METH_VARARGS|METH_KEYWORDS, __pyx_doc_6gensim_6models_13doc2vec_inner_2train_document_dm}; static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_doc_words = 0; @@ -4972,23 +4964,23 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyO values[4] = ((PyObject *)Py_None); values[5] = ((PyObject *)Py_None); - /* "gensim/models/doc2vec_inner.pyx":364 + /* "gensim/models/doc2vec_inner.pyx":402 * * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): - * cdef int hs = model.hs + * """Update distributed memory model ("PV-DM") by training on a single document. */ values[6] = ((PyObject *)Py_True); values[7] = ((PyObject *)Py_True); values[8] = ((PyObject *)Py_True); - /* "gensim/models/doc2vec_inner.pyx":365 + /* "gensim/models/doc2vec_inner.pyx":403 * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * """Update distributed memory model ("PV-DM") by training on a single document. + * This method implements the DM model with a projection (input) layer that is either the sum or mean of the context */ values[9] = ((PyObject *)Py_None); values[10] = ((PyObject *)Py_None); @@ -5036,19 +5028,19 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyO case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doc_words)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 1); __PYX_ERR(0, 363, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 1); __PYX_ERR(0, 401, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_indexes)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 2); __PYX_ERR(0, 363, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 2); __PYX_ERR(0, 401, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 3); __PYX_ERR(0, 363, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 3); __PYX_ERR(0, 401, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 4: @@ -5106,7 +5098,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyO } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_document_dm") < 0)) __PYX_ERR(0, 363, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_document_dm") < 0)) __PYX_ERR(0, 401, __pyx_L3_error) } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -5152,7 +5144,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyO } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 363, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 401, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.doc2vec_inner.train_document_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -5160,7 +5152,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyO __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(__pyx_self, __pyx_v_model, __pyx_v_doc_words, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); - /* "gensim/models/doc2vec_inner.pyx":363 + /* "gensim/models/doc2vec_inner.pyx":401 * * * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< @@ -5243,94 +5235,94 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __Pyx_INCREF(__pyx_v_doctag_vectors); __Pyx_INCREF(__pyx_v_doctag_locks); - /* "gensim/models/doc2vec_inner.pyx":366 - * learn_doctags=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): + /* "gensim/models/doc2vec_inner.pyx":449 + * + * """ * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 366, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 366, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":367 - * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): + /* "gensim/models/doc2vec_inner.pyx":450 + * """ * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.vocabulary.sample != 0) * cdef int _learn_doctags = learn_doctags */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 367, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 367, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":368 + /* "gensim/models/doc2vec_inner.pyx":451 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) # <<<<<<<<<<<<<< * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 368, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 368, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 368, __pyx_L1_error) + __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 368, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 451, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_sample = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":369 + /* "gensim/models/doc2vec_inner.pyx":452 * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) * cdef int _learn_doctags = learn_doctags # <<<<<<<<<<<<<< * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 369, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 452, __pyx_L1_error) __pyx_v__learn_doctags = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":370 + /* "gensim/models/doc2vec_inner.pyx":453 * cdef int sample = (model.vocabulary.sample != 0) * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 370, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 453, __pyx_L1_error) __pyx_v__learn_words = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":371 + /* "gensim/models/doc2vec_inner.pyx":454 * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean * cdef REAL_t count, inv_count = 1.0 */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 371, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 454, __pyx_L1_error) __pyx_v__learn_hidden = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":372 + /* "gensim/models/doc2vec_inner.pyx":455 * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * cdef REAL_t count, inv_count = 1.0 * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 372, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 372, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":373 + /* "gensim/models/doc2vec_inner.pyx":456 * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean * cdef REAL_t count, inv_count = 1.0 # <<<<<<<<<<<<<< @@ -5339,46 +5331,46 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_inv_count = 1.0; - /* "gensim/models/doc2vec_inner.pyx":381 + /* "gensim/models/doc2vec_inner.pyx":464 * cdef REAL_t *_work * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.trainables.layer1_size * */ - __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 381, __pyx_L1_error) + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 464, __pyx_L1_error) __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/doc2vec_inner.pyx":382 + /* "gensim/models/doc2vec_inner.pyx":465 * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha * cdef int size = model.trainables.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_DOCUMENT_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 382, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 465, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 382, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 465, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 382, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 465, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":390 + /* "gensim/models/doc2vec_inner.pyx":473 * cdef int document_len * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k, m */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 390, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 473, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 390, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 473, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":393 + /* "gensim/models/doc2vec_inner.pyx":476 * * cdef int i, j, k, m * cdef long result = 0 # <<<<<<<<<<<<<< @@ -5387,7 +5379,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_result = 0; - /* "gensim/models/doc2vec_inner.pyx":408 + /* "gensim/models/doc2vec_inner.pyx":491 * * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< @@ -5398,22 +5390,22 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = (__pyx_t_5 != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":409 + /* "gensim/models/doc2vec_inner.pyx":492 * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: * word_vectors = model.wv.vectors # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 409, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 492, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 409, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 492, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":408 + /* "gensim/models/doc2vec_inner.pyx":491 * * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< @@ -5422,17 +5414,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":410 + /* "gensim/models/doc2vec_inner.pyx":493 * if word_vectors is None: * word_vectors = model.wv.vectors * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< * if doctag_vectors is None: * doctag_vectors = model.docvecs.vectors_docs */ - if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 410, __pyx_L1_error) + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 493, __pyx_L1_error) __pyx_v__word_vectors = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); - /* "gensim/models/doc2vec_inner.pyx":411 + /* "gensim/models/doc2vec_inner.pyx":494 * word_vectors = model.wv.vectors * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: # <<<<<<<<<<<<<< @@ -5443,22 +5435,22 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_5 = (__pyx_t_6 != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":412 + /* "gensim/models/doc2vec_inner.pyx":495 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: * doctag_vectors = model.docvecs.vectors_docs # <<<<<<<<<<<<<< * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 412, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 495, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_docs); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 412, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_docs); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 495, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":411 + /* "gensim/models/doc2vec_inner.pyx":494 * word_vectors = model.wv.vectors * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: # <<<<<<<<<<<<<< @@ -5467,17 +5459,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":413 + /* "gensim/models/doc2vec_inner.pyx":496 * if doctag_vectors is None: * doctag_vectors = model.docvecs.vectors_docs * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) # <<<<<<<<<<<<<< * if word_locks is None: * word_locks = model.trainables.vectors_lockf */ - if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 413, __pyx_L1_error) + if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 496, __pyx_L1_error) __pyx_v__doctag_vectors = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_vectors))); - /* "gensim/models/doc2vec_inner.pyx":414 + /* "gensim/models/doc2vec_inner.pyx":497 * doctag_vectors = model.docvecs.vectors_docs * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< @@ -5488,22 +5480,22 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = (__pyx_t_5 != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":415 + /* "gensim/models/doc2vec_inner.pyx":498 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: * word_locks = model.trainables.vectors_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 415, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 498, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 415, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 498, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":414 + /* "gensim/models/doc2vec_inner.pyx":497 * doctag_vectors = model.docvecs.vectors_docs * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< @@ -5512,17 +5504,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":416 + /* "gensim/models/doc2vec_inner.pyx":499 * if word_locks is None: * word_locks = model.trainables.vectors_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< * if doctag_locks is None: * doctag_locks = model.trainables.vectors_docs_lockf */ - if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 416, __pyx_L1_error) + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 499, __pyx_L1_error) __pyx_v__word_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); - /* "gensim/models/doc2vec_inner.pyx":417 + /* "gensim/models/doc2vec_inner.pyx":500 * word_locks = model.trainables.vectors_lockf * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: # <<<<<<<<<<<<<< @@ -5533,22 +5525,22 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_5 = (__pyx_t_6 != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":418 + /* "gensim/models/doc2vec_inner.pyx":501 * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: * doctag_locks = model.trainables.vectors_docs_lockf # <<<<<<<<<<<<<< * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 418, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 501, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_docs_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 418, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_docs_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 501, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":417 + /* "gensim/models/doc2vec_inner.pyx":500 * word_locks = model.trainables.vectors_lockf * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: # <<<<<<<<<<<<<< @@ -5557,17 +5549,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":419 + /* "gensim/models/doc2vec_inner.pyx":502 * if doctag_locks is None: * doctag_locks = model.trainables.vectors_docs_lockf * _doctag_locks = (np.PyArray_DATA(doctag_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 419, __pyx_L1_error) + if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 502, __pyx_L1_error) __pyx_v__doctag_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_locks))); - /* "gensim/models/doc2vec_inner.pyx":421 + /* "gensim/models/doc2vec_inner.pyx":504 * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< @@ -5577,23 +5569,23 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":422 + /* "gensim/models/doc2vec_inner.pyx":505 * * if hs: * syn1 = (np.PyArray_DATA(model.trainables.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 422, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 505, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 422, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 505, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 422, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 505, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":421 + /* "gensim/models/doc2vec_inner.pyx":504 * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< @@ -5602,7 +5594,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":424 + /* "gensim/models/doc2vec_inner.pyx":507 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5612,55 +5604,55 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":425 + /* "gensim/models/doc2vec_inner.pyx":508 * * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 425, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 508, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 425, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 508, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 425, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 508, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":426 + /* "gensim/models/doc2vec_inner.pyx":509 * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 426, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 509, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 426, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 509, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 426, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 509, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":427 + /* "gensim/models/doc2vec_inner.pyx":510 * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 427, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 427, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_7 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 427, __pyx_L1_error) + __pyx_t_7 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_7; - /* "gensim/models/doc2vec_inner.pyx":424 + /* "gensim/models/doc2vec_inner.pyx":507 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5669,7 +5661,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":428 + /* "gensim/models/doc2vec_inner.pyx":511 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5687,41 +5679,41 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_L10_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":429 + /* "gensim/models/doc2vec_inner.pyx":512 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 512, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - /* "gensim/models/doc2vec_inner.pyx":428 + /* "gensim/models/doc2vec_inner.pyx":511 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5730,7 +5722,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":432 + /* "gensim/models/doc2vec_inner.pyx":515 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< @@ -5741,32 +5733,32 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = (__pyx_t_5 != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":433 + /* "gensim/models/doc2vec_inner.pyx":516 * # convert Python structures to primitive types, so we can release the GIL * if work is None: * work = zeros(model.trainables.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _work = np.PyArray_DATA(work) * if neu1 is None: */ - __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 433, __pyx_L1_error) + __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 516, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 433, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 516, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 433, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 516, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 433, __pyx_L1_error) + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 516, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 433, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 516, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 433, __pyx_L1_error) + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 516, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_10) < 0) __PYX_ERR(0, 433, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_10) < 0) __PYX_ERR(0, 516, __pyx_L1_error) __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_3, __pyx_t_1); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 433, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_3, __pyx_t_1); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 516, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -5774,7 +5766,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __Pyx_DECREF_SET(__pyx_v_work, __pyx_t_10); __pyx_t_10 = 0; - /* "gensim/models/doc2vec_inner.pyx":432 + /* "gensim/models/doc2vec_inner.pyx":515 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< @@ -5783,17 +5775,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":434 + /* "gensim/models/doc2vec_inner.pyx":517 * if work is None: * work = zeros(model.trainables.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< * if neu1 is None: * neu1 = zeros(model.trainables.layer1_size, dtype=REAL) */ - if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 434, __pyx_L1_error) + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 517, __pyx_L1_error) __pyx_v__work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); - /* "gensim/models/doc2vec_inner.pyx":435 + /* "gensim/models/doc2vec_inner.pyx":518 * work = zeros(model.trainables.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) * if neu1 is None: # <<<<<<<<<<<<<< @@ -5804,32 +5796,32 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_5 = (__pyx_t_6 != 0); if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":436 + /* "gensim/models/doc2vec_inner.pyx":519 * _work = np.PyArray_DATA(work) * if neu1 is None: * neu1 = zeros(model.trainables.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _neu1 = np.PyArray_DATA(neu1) * */ - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 436, __pyx_L1_error) + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 436, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 436, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 436, __pyx_L1_error) + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_GIVEREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 436, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 436, __pyx_L1_error) + __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_dtype, __pyx_t_8) < 0) __PYX_ERR(0, 436, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_dtype, __pyx_t_8) < 0) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 436, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 519, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -5837,7 +5829,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __Pyx_DECREF_SET(__pyx_v_neu1, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/doc2vec_inner.pyx":435 + /* "gensim/models/doc2vec_inner.pyx":518 * work = zeros(model.trainables.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) * if neu1 is None: # <<<<<<<<<<<<<< @@ -5846,32 +5838,32 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":437 + /* "gensim/models/doc2vec_inner.pyx":520 * if neu1 is None: * neu1 = zeros(model.trainables.layer1_size, dtype=REAL) * _neu1 = np.PyArray_DATA(neu1) # <<<<<<<<<<<<<< * * vlookup = model.wv.vocab */ - if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 437, __pyx_L1_error) + if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 520, __pyx_L1_error) __pyx_v__neu1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_neu1))); - /* "gensim/models/doc2vec_inner.pyx":439 + /* "gensim/models/doc2vec_inner.pyx":522 * _neu1 = np.PyArray_DATA(neu1) * * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * i = 0 * for token in doc_words: */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 439, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 522, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 439, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 522, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_vlookup = __pyx_t_3; __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":440 + /* "gensim/models/doc2vec_inner.pyx":523 * * vlookup = model.wv.vocab * i = 0 # <<<<<<<<<<<<<< @@ -5880,7 +5872,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_i = 0; - /* "gensim/models/doc2vec_inner.pyx":441 + /* "gensim/models/doc2vec_inner.pyx":524 * vlookup = model.wv.vocab * i = 0 * for token in doc_words: # <<<<<<<<<<<<<< @@ -5891,26 +5883,26 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_3 = __pyx_v_doc_words; __Pyx_INCREF(__pyx_t_3); __pyx_t_7 = 0; __pyx_t_11 = NULL; } else { - __pyx_t_7 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_doc_words); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_7 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_doc_words); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 524, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_11 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_11 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 524, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_11)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_8); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_8); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 524, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 524, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } else { if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_8); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_8); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 524, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 524, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } @@ -5920,7 +5912,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 441, __pyx_L1_error) + else __PYX_ERR(0, 524, __pyx_L1_error) } break; } @@ -5929,16 +5921,16 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/doc2vec_inner.pyx":442 + /* "gensim/models/doc2vec_inner.pyx":525 * i = 0 * for token in doc_words: * predict_word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if predict_word is None: # shrink document to leave out word * continue # leaving i unchanged */ - __pyx_t_5 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 442, __pyx_L1_error) + __pyx_t_5 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 525, __pyx_L1_error) if ((__pyx_t_5 != 0)) { - __pyx_t_1 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 442, __pyx_L1_error) + __pyx_t_1 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 525, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_8 = __pyx_t_1; __pyx_t_1 = 0; @@ -5949,7 +5941,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __Pyx_XDECREF_SET(__pyx_v_predict_word, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/doc2vec_inner.pyx":443 + /* "gensim/models/doc2vec_inner.pyx":526 * for token in doc_words: * predict_word = vlookup[token] if token in vlookup else None * if predict_word is None: # shrink document to leave out word # <<<<<<<<<<<<<< @@ -5960,7 +5952,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = (__pyx_t_5 != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":444 + /* "gensim/models/doc2vec_inner.pyx":527 * predict_word = vlookup[token] if token in vlookup else None * if predict_word is None: # shrink document to leave out word * continue # leaving i unchanged # <<<<<<<<<<<<<< @@ -5969,7 +5961,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ goto __pyx_L14_continue; - /* "gensim/models/doc2vec_inner.pyx":443 + /* "gensim/models/doc2vec_inner.pyx":526 * for token in doc_words: * predict_word = vlookup[token] if token in vlookup else None * if predict_word is None: # shrink document to leave out word # <<<<<<<<<<<<<< @@ -5978,7 +5970,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":445 + /* "gensim/models/doc2vec_inner.pyx":528 * if predict_word is None: # shrink document to leave out word * continue # leaving i unchanged * if sample and predict_word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -5991,20 +5983,20 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = __pyx_t_5; goto __pyx_L18_bool_binop_done; } - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 528, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 528, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = PyObject_RichCompare(__pyx_t_8, __pyx_t_1, Py_LT); __Pyx_XGOTREF(__pyx_t_10); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_10 = PyObject_RichCompare(__pyx_t_8, __pyx_t_1, Py_LT); __Pyx_XGOTREF(__pyx_t_10); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 528, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_10); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_10); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 528, __pyx_L1_error) __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_t_6 = __pyx_t_5; __pyx_L18_bool_binop_done:; if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":446 + /* "gensim/models/doc2vec_inner.pyx":529 * continue # leaving i unchanged * if sample and predict_word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< @@ -6013,7 +6005,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ goto __pyx_L14_continue; - /* "gensim/models/doc2vec_inner.pyx":445 + /* "gensim/models/doc2vec_inner.pyx":528 * if predict_word is None: # shrink document to leave out word * continue # leaving i unchanged * if sample and predict_word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -6022,20 +6014,20 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":447 + /* "gensim/models/doc2vec_inner.pyx":530 * if sample and predict_word.sample_int < random_int32(&next_random): * continue * indexes[i] = predict_word.index # <<<<<<<<<<<<<< * if hs: * codelens[i] = len(predict_word.code) */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 447, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 530, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_12 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 447, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_12 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 530, __pyx_L1_error) __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/doc2vec_inner.pyx":448 + /* "gensim/models/doc2vec_inner.pyx":531 * continue * indexes[i] = predict_word.index * if hs: # <<<<<<<<<<<<<< @@ -6045,46 +6037,46 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = (__pyx_v_hs != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":449 + /* "gensim/models/doc2vec_inner.pyx":532 * indexes[i] = predict_word.index * if hs: * codelens[i] = len(predict_word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(predict_word.code) * points[i] = np.PyArray_DATA(predict_word.point) */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 449, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 532, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - __pyx_t_13 = PyObject_Length(__pyx_t_10); if (unlikely(__pyx_t_13 == ((Py_ssize_t)-1))) __PYX_ERR(0, 449, __pyx_L1_error) + __pyx_t_13 = PyObject_Length(__pyx_t_10); if (unlikely(__pyx_t_13 == ((Py_ssize_t)-1))) __PYX_ERR(0, 532, __pyx_L1_error) __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_13); - /* "gensim/models/doc2vec_inner.pyx":450 + /* "gensim/models/doc2vec_inner.pyx":533 * if hs: * codelens[i] = len(predict_word.code) * codes[i] = np.PyArray_DATA(predict_word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(predict_word.point) * result += 1 */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 533, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 450, __pyx_L1_error) + if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 533, __pyx_L1_error) (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - /* "gensim/models/doc2vec_inner.pyx":451 + /* "gensim/models/doc2vec_inner.pyx":534 * codelens[i] = len(predict_word.code) * codes[i] = np.PyArray_DATA(predict_word.code) * points[i] = np.PyArray_DATA(predict_word.point) # <<<<<<<<<<<<<< * result += 1 * i += 1 */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 451, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 534, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 451, __pyx_L1_error) + if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 534, __pyx_L1_error) (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - /* "gensim/models/doc2vec_inner.pyx":448 + /* "gensim/models/doc2vec_inner.pyx":531 * continue * indexes[i] = predict_word.index * if hs: # <<<<<<<<<<<<<< @@ -6093,7 +6085,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":452 + /* "gensim/models/doc2vec_inner.pyx":535 * codes[i] = np.PyArray_DATA(predict_word.code) * points[i] = np.PyArray_DATA(predict_word.point) * result += 1 # <<<<<<<<<<<<<< @@ -6102,7 +6094,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_result = (__pyx_v_result + 1); - /* "gensim/models/doc2vec_inner.pyx":453 + /* "gensim/models/doc2vec_inner.pyx":536 * points[i] = np.PyArray_DATA(predict_word.point) * result += 1 * i += 1 # <<<<<<<<<<<<<< @@ -6111,7 +6103,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_i = (__pyx_v_i + 1); - /* "gensim/models/doc2vec_inner.pyx":454 + /* "gensim/models/doc2vec_inner.pyx":537 * result += 1 * i += 1 * if i == MAX_DOCUMENT_LEN: # <<<<<<<<<<<<<< @@ -6121,7 +6113,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = ((__pyx_v_i == 0x2710) != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":455 + /* "gensim/models/doc2vec_inner.pyx":538 * i += 1 * if i == MAX_DOCUMENT_LEN: * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< @@ -6130,7 +6122,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ goto __pyx_L15_break; - /* "gensim/models/doc2vec_inner.pyx":454 + /* "gensim/models/doc2vec_inner.pyx":537 * result += 1 * i += 1 * if i == MAX_DOCUMENT_LEN: # <<<<<<<<<<<<<< @@ -6139,7 +6131,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":441 + /* "gensim/models/doc2vec_inner.pyx":524 * vlookup = model.wv.vocab * i = 0 * for token in doc_words: # <<<<<<<<<<<<<< @@ -6151,7 +6143,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_L15_break:; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":456 + /* "gensim/models/doc2vec_inner.pyx":539 * if i == MAX_DOCUMENT_LEN: * break # TODO: log warning, tally overflow? * document_len = i # <<<<<<<<<<<<<< @@ -6160,7 +6152,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_document_len = __pyx_v_i; - /* "gensim/models/doc2vec_inner.pyx":459 + /* "gensim/models/doc2vec_inner.pyx":542 * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, document_len)): # <<<<<<<<<<<<<< @@ -6168,14 +6160,14 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT * */ __pyx_t_2 = 0; - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 542, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_10, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_10, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 542, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 542, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - __pyx_t_8 = __Pyx_PyInt_From_int(__pyx_v_document_len); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyInt_From_int(__pyx_v_document_len); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 542, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __pyx_t_14 = NULL; __pyx_t_15 = 0; @@ -6192,7 +6184,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[4] = {__pyx_t_14, __pyx_int_0, __pyx_t_10, __pyx_t_8}; - __pyx_t_3 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_15, 3+__pyx_t_15); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_15, 3+__pyx_t_15); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 542, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; @@ -6202,7 +6194,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[4] = {__pyx_t_14, __pyx_int_0, __pyx_t_10, __pyx_t_8}; - __pyx_t_3 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_15, 3+__pyx_t_15); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_15, 3+__pyx_t_15); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 542, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; @@ -6210,7 +6202,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT } else #endif { - __pyx_t_16 = PyTuple_New(3+__pyx_t_15); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_16 = PyTuple_New(3+__pyx_t_15); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 542, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_14) { __Pyx_GIVEREF(__pyx_t_14); PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_14); __pyx_t_14 = NULL; @@ -6224,7 +6216,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_15, __pyx_t_8); __pyx_t_10 = 0; __pyx_t_8 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_16, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_16, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 542, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; } @@ -6233,9 +6225,9 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_1 = __pyx_t_3; __Pyx_INCREF(__pyx_t_1); __pyx_t_7 = 0; __pyx_t_11 = NULL; } else { - __pyx_t_7 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_7 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 542, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_11 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_11 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 542, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { @@ -6243,17 +6235,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT if (likely(PyList_CheckExact(__pyx_t_1))) { if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_3); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_3); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 542, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 542, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_3); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_3); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 542, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 542, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } @@ -6263,7 +6255,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 459, __pyx_L1_error) + else __PYX_ERR(0, 542, __pyx_L1_error) } break; } @@ -6274,17 +6266,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/doc2vec_inner.pyx":460 + /* "gensim/models/doc2vec_inner.pyx":543 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, document_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) */ - __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_12 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 460, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_12 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 543, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/doc2vec_inner.pyx":459 + /* "gensim/models/doc2vec_inner.pyx":542 * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, document_len)): # <<<<<<<<<<<<<< @@ -6294,14 +6286,14 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":462 + /* "gensim/models/doc2vec_inner.pyx":545 * reduced_windows[i] = item * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] */ - __pyx_t_7 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 462, __pyx_L1_error) + __pyx_t_7 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 545, __pyx_L1_error) __pyx_t_17 = 0x2710; if (((__pyx_t_7 < __pyx_t_17) != 0)) { __pyx_t_13 = __pyx_t_7; @@ -6310,7 +6302,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT } __pyx_v_doctag_len = ((int)__pyx_t_13); - /* "gensim/models/doc2vec_inner.pyx":463 + /* "gensim/models/doc2vec_inner.pyx":546 * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * for i in range(doctag_len): # <<<<<<<<<<<<<< @@ -6321,20 +6313,20 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_2; __pyx_t_15+=1) { __pyx_v_i = __pyx_t_15; - /* "gensim/models/doc2vec_inner.pyx":464 + /* "gensim/models/doc2vec_inner.pyx":547 * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 464, __pyx_L1_error) + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 547, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_1); if (unlikely((__pyx_t_12 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 464, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_1); if (unlikely((__pyx_t_12 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 547, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/doc2vec_inner.pyx":465 + /* "gensim/models/doc2vec_inner.pyx":548 * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< @@ -6344,7 +6336,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_v_result = (__pyx_v_result + 1); } - /* "gensim/models/doc2vec_inner.pyx":468 + /* "gensim/models/doc2vec_inner.pyx":551 * * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< @@ -6359,7 +6351,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT #endif /*try:*/ { - /* "gensim/models/doc2vec_inner.pyx":469 + /* "gensim/models/doc2vec_inner.pyx":552 * # release GIL & train on the document * with nogil: * for i in range(document_len): # <<<<<<<<<<<<<< @@ -6370,7 +6362,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_2; __pyx_t_15+=1) { __pyx_v_i = __pyx_t_15; - /* "gensim/models/doc2vec_inner.pyx":470 + /* "gensim/models/doc2vec_inner.pyx":553 * with nogil: * for i in range(document_len): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -6379,7 +6371,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/doc2vec_inner.pyx":471 + /* "gensim/models/doc2vec_inner.pyx":554 * for i in range(document_len): * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -6389,7 +6381,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = ((__pyx_v_j < 0) != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":472 + /* "gensim/models/doc2vec_inner.pyx":555 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -6398,7 +6390,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_j = 0; - /* "gensim/models/doc2vec_inner.pyx":471 + /* "gensim/models/doc2vec_inner.pyx":554 * for i in range(document_len): * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -6407,7 +6399,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":473 + /* "gensim/models/doc2vec_inner.pyx":556 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -6416,7 +6408,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/doc2vec_inner.pyx":474 + /* "gensim/models/doc2vec_inner.pyx":557 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > document_len: # <<<<<<<<<<<<<< @@ -6426,7 +6418,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = ((__pyx_v_k > __pyx_v_document_len) != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":475 + /* "gensim/models/doc2vec_inner.pyx":558 * k = i + window + 1 - reduced_windows[i] * if k > document_len: * k = document_len # <<<<<<<<<<<<<< @@ -6435,7 +6427,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_k = __pyx_v_document_len; - /* "gensim/models/doc2vec_inner.pyx":474 + /* "gensim/models/doc2vec_inner.pyx":557 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > document_len: # <<<<<<<<<<<<<< @@ -6444,7 +6436,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":478 + /* "gensim/models/doc2vec_inner.pyx":561 * * # compose l1 (in _neu1) & clear _work * memset(_neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -6453,7 +6445,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ memset(__pyx_v__neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/doc2vec_inner.pyx":479 + /* "gensim/models/doc2vec_inner.pyx":562 * # compose l1 (in _neu1) & clear _work * memset(_neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -6462,7 +6454,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_count = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "gensim/models/doc2vec_inner.pyx":480 + /* "gensim/models/doc2vec_inner.pyx":563 * memset(_neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -6473,7 +6465,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "gensim/models/doc2vec_inner.pyx":481 + /* "gensim/models/doc2vec_inner.pyx":564 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -6483,7 +6475,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":482 + /* "gensim/models/doc2vec_inner.pyx":565 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< @@ -6492,7 +6484,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ goto __pyx_L33_continue; - /* "gensim/models/doc2vec_inner.pyx":481 + /* "gensim/models/doc2vec_inner.pyx":564 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -6501,7 +6493,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":484 + /* "gensim/models/doc2vec_inner.pyx":567 * continue * else: * count += ONEF # <<<<<<<<<<<<<< @@ -6511,7 +6503,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT /*else*/ { __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_13doc2vec_inner_ONEF); - /* "gensim/models/doc2vec_inner.pyx":485 + /* "gensim/models/doc2vec_inner.pyx":568 * else: * count += ONEF * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< @@ -6523,7 +6515,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_L33_continue:; } - /* "gensim/models/doc2vec_inner.pyx":486 + /* "gensim/models/doc2vec_inner.pyx":569 * count += ONEF * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) * for m in range(doctag_len): # <<<<<<<<<<<<<< @@ -6534,7 +6526,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "gensim/models/doc2vec_inner.pyx":487 + /* "gensim/models/doc2vec_inner.pyx":570 * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) * for m in range(doctag_len): * count += ONEF # <<<<<<<<<<<<<< @@ -6543,7 +6535,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_13doc2vec_inner_ONEF); - /* "gensim/models/doc2vec_inner.pyx":488 + /* "gensim/models/doc2vec_inner.pyx":571 * for m in range(doctag_len): * count += ONEF * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< @@ -6553,7 +6545,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), __pyx_v__neu1, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); } - /* "gensim/models/doc2vec_inner.pyx":489 + /* "gensim/models/doc2vec_inner.pyx":572 * count += ONEF * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) * if count > (0.5): # <<<<<<<<<<<<<< @@ -6563,7 +6555,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = ((__pyx_v_count > ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":490 + /* "gensim/models/doc2vec_inner.pyx":573 * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) * if count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< @@ -6572,7 +6564,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_inv_count = (__pyx_v_6gensim_6models_13doc2vec_inner_ONEF / __pyx_v_count); - /* "gensim/models/doc2vec_inner.pyx":489 + /* "gensim/models/doc2vec_inner.pyx":572 * count += ONEF * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) * if count > (0.5): # <<<<<<<<<<<<<< @@ -6581,7 +6573,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":491 + /* "gensim/models/doc2vec_inner.pyx":574 * if count > (0.5): * inv_count = ONEF/count * if cbow_mean: # <<<<<<<<<<<<<< @@ -6591,7 +6583,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = (__pyx_v_cbow_mean != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":492 + /* "gensim/models/doc2vec_inner.pyx":575 * inv_count = ONEF/count * if cbow_mean: * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< @@ -6600,7 +6592,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v__neu1, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":491 + /* "gensim/models/doc2vec_inner.pyx":574 * if count > (0.5): * inv_count = ONEF/count * if cbow_mean: # <<<<<<<<<<<<<< @@ -6609,7 +6601,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":493 + /* "gensim/models/doc2vec_inner.pyx":576 * if cbow_mean: * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error # <<<<<<<<<<<<<< @@ -6618,7 +6610,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ memset(__pyx_v__work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/doc2vec_inner.pyx":494 + /* "gensim/models/doc2vec_inner.pyx":577 * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error * if hs: # <<<<<<<<<<<<<< @@ -6628,7 +6620,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = (__pyx_v_hs != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":495 + /* "gensim/models/doc2vec_inner.pyx":578 * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error * if hs: * fast_document_dm_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< @@ -6637,7 +6629,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__neu1, __pyx_v_syn1, __pyx_v__alpha, __pyx_v__work, __pyx_v_size, __pyx_v__learn_hidden); - /* "gensim/models/doc2vec_inner.pyx":494 + /* "gensim/models/doc2vec_inner.pyx":577 * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error * if hs: # <<<<<<<<<<<<<< @@ -6646,7 +6638,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":498 + /* "gensim/models/doc2vec_inner.pyx":581 * _neu1, syn1, _alpha, _work, * size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< @@ -6656,7 +6648,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = (__pyx_v_negative != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":499 + /* "gensim/models/doc2vec_inner.pyx":582 * size, _learn_hidden) * if negative: * next_random = fast_document_dm_neg(negative, cum_table, cum_table_len, next_random, # <<<<<<<<<<<<<< @@ -6665,7 +6657,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_next_random = __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_next_random, __pyx_v__neu1, __pyx_v_syn1neg, (__pyx_v_indexes[__pyx_v_i]), __pyx_v__alpha, __pyx_v__work, __pyx_v_size, __pyx_v__learn_hidden); - /* "gensim/models/doc2vec_inner.pyx":498 + /* "gensim/models/doc2vec_inner.pyx":581 * _neu1, syn1, _alpha, _work, * size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< @@ -6674,7 +6666,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":503 + /* "gensim/models/doc2vec_inner.pyx":586 * size, _learn_hidden) * * if not cbow_mean: # <<<<<<<<<<<<<< @@ -6684,7 +6676,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = ((!(__pyx_v_cbow_mean != 0)) != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":504 + /* "gensim/models/doc2vec_inner.pyx":587 * * if not cbow_mean: * sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< @@ -6693,7 +6685,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v__work, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - /* "gensim/models/doc2vec_inner.pyx":503 + /* "gensim/models/doc2vec_inner.pyx":586 * size, _learn_hidden) * * if not cbow_mean: # <<<<<<<<<<<<<< @@ -6702,7 +6694,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":506 + /* "gensim/models/doc2vec_inner.pyx":589 * sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) * # apply accumulated error in work * if _learn_doctags: # <<<<<<<<<<<<<< @@ -6712,7 +6704,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = (__pyx_v__learn_doctags != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":507 + /* "gensim/models/doc2vec_inner.pyx":590 * # apply accumulated error in work * if _learn_doctags: * for m in range(doctag_len): # <<<<<<<<<<<<<< @@ -6723,7 +6715,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "gensim/models/doc2vec_inner.pyx":508 + /* "gensim/models/doc2vec_inner.pyx":591 * if _learn_doctags: * for m in range(doctag_len): * our_saxpy(&size, &_doctag_locks[_doctag_indexes[m]], _work, # <<<<<<<<<<<<<< @@ -6733,7 +6725,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v__doctag_locks[(__pyx_v__doctag_indexes[__pyx_v_m])])), __pyx_v__work, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); } - /* "gensim/models/doc2vec_inner.pyx":506 + /* "gensim/models/doc2vec_inner.pyx":589 * sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) * # apply accumulated error in work * if _learn_doctags: # <<<<<<<<<<<<<< @@ -6742,7 +6734,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":510 + /* "gensim/models/doc2vec_inner.pyx":593 * our_saxpy(&size, &_doctag_locks[_doctag_indexes[m]], _work, * &ONE, &_doctag_vectors[_doctag_indexes[m] * size], &ONE) * if _learn_words: # <<<<<<<<<<<<<< @@ -6752,7 +6744,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = (__pyx_v__learn_words != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":511 + /* "gensim/models/doc2vec_inner.pyx":594 * &ONE, &_doctag_vectors[_doctag_indexes[m] * size], &ONE) * if _learn_words: * for m in range(j, k): # <<<<<<<<<<<<<< @@ -6763,7 +6755,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "gensim/models/doc2vec_inner.pyx":512 + /* "gensim/models/doc2vec_inner.pyx":595 * if _learn_words: * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -6773,7 +6765,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_6 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":513 + /* "gensim/models/doc2vec_inner.pyx":596 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< @@ -6782,7 +6774,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ goto __pyx_L47_continue; - /* "gensim/models/doc2vec_inner.pyx":512 + /* "gensim/models/doc2vec_inner.pyx":595 * if _learn_words: * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -6791,7 +6783,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ } - /* "gensim/models/doc2vec_inner.pyx":515 + /* "gensim/models/doc2vec_inner.pyx":598 * continue * else: * our_saxpy(&size, &_word_locks[indexes[m]], _work, &ONE, # <<<<<<<<<<<<<< @@ -6800,7 +6792,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ /*else*/ { - /* "gensim/models/doc2vec_inner.pyx":516 + /* "gensim/models/doc2vec_inner.pyx":599 * else: * our_saxpy(&size, &_word_locks[indexes[m]], _work, &ONE, * &_word_vectors[indexes[m] * size], &ONE) # <<<<<<<<<<<<<< @@ -6812,7 +6804,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_L47_continue:; } - /* "gensim/models/doc2vec_inner.pyx":510 + /* "gensim/models/doc2vec_inner.pyx":593 * our_saxpy(&size, &_doctag_locks[_doctag_indexes[m]], _work, * &ONE, &_doctag_vectors[_doctag_indexes[m] * size], &ONE) * if _learn_words: # <<<<<<<<<<<<<< @@ -6823,7 +6815,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT } } - /* "gensim/models/doc2vec_inner.pyx":468 + /* "gensim/models/doc2vec_inner.pyx":551 * * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< @@ -6842,7 +6834,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT } } - /* "gensim/models/doc2vec_inner.pyx":518 + /* "gensim/models/doc2vec_inner.pyx":601 * &_word_vectors[indexes[m] * size], &ONE) * * return result # <<<<<<<<<<<<<< @@ -6850,13 +6842,13 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 518, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 601, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "gensim/models/doc2vec_inner.pyx":363 + /* "gensim/models/doc2vec_inner.pyx":401 * * * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< @@ -6890,7 +6882,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT return __pyx_r; } -/* "gensim/models/doc2vec_inner.pyx":521 +/* "gensim/models/doc2vec_inner.pyx":604 * * * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< @@ -6900,7 +6892,8 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT /* Python wrapper */ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_concat(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyMethodDef __pyx_mdef_6gensim_6models_13doc2vec_inner_5train_document_dm_concat = {"train_document_dm_concat", (PyCFunction)__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_concat, METH_VARARGS|METH_KEYWORDS, 0}; +static char __pyx_doc_6gensim_6models_13doc2vec_inner_4train_document_dm_concat[] = "train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None)\nUpdate distributed memory model (\"PV-DM\") by training on a single document, using a concatenation of the context\n window word vectors (rather than a sum or average).\n This might be slower since the input at each batch will be significantly larger.\n\n Called internally from :meth:`~gensim.models.doc2vec.Doc2Vec.train` and\n :meth:`~gensim.models.doc2vec.Doc2Vec.infer_vector`.\n\n Parameters\n ----------\n model : :class:`~gensim.models.doc2vec.Doc2Vec`\n The model to train.\n doc_words : list of str\n The input document as a list of words to be used for training. Each word will be looked up in\n the model's vocabulary.\n doctag_indexes : list of int\n Indices into `doctag_vectors` used to obtain the tags of the document.\n alpha : float, optional\n Learning rate.\n work : np.ndarray, optional\n Private working memory for each worker.\n neu1 : np.ndarray, optional\n Private working memory for each worker.\n learn_doctags : bool, optional\n Whether the tag vectors should be updated.\n learn_words : bool, optional\n Word vectors will be updated exactly as per Word2Vec skip-gram training only if **both**\n `learn_words` and `train_words` are set to True.\n learn_hidden : bool, optional\n Whether or not the weights of the hidden layer will be updated.\n word_vectors : numpy.ndarray, optional\n The vector representation for each word in the vocabulary. If None, these will be retrieved from the model.\n word_locks : numpy.ndarray, optional\n A learning lock factor for each weight in the hidden layer for words, value 0 completely blocks updates,\n a value of 1 allows to update word-vectors.\n doctag_v""ectors : numpy.ndarray, optional\n Vector representations of the tags. If None, these will be retrieved from the model.\n doctag_locks : numpy.ndarray, optional\n The lock factors for each tag, same as `word_locks`, but for document-vectors.\n\n Returns\n -------\n int\n Number of words in the input document that were actually used for training.\n\n "; +static PyMethodDef __pyx_mdef_6gensim_6models_13doc2vec_inner_5train_document_dm_concat = {"train_document_dm_concat", (PyCFunction)__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_concat, METH_VARARGS|METH_KEYWORDS, __pyx_doc_6gensim_6models_13doc2vec_inner_4train_document_dm_concat}; static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_concat(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_doc_words = 0; @@ -6924,23 +6917,23 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_con values[4] = ((PyObject *)Py_None); values[5] = ((PyObject *)Py_None); - /* "gensim/models/doc2vec_inner.pyx":522 + /* "gensim/models/doc2vec_inner.pyx":605 * * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): - * cdef int hs = model.hs + * """Update distributed memory model ("PV-DM") by training on a single document, using a concatenation of the context */ values[6] = ((PyObject *)Py_True); values[7] = ((PyObject *)Py_True); values[8] = ((PyObject *)Py_True); - /* "gensim/models/doc2vec_inner.pyx":523 + /* "gensim/models/doc2vec_inner.pyx":606 * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * """Update distributed memory model ("PV-DM") by training on a single document, using a concatenation of the context + * window word vectors (rather than a sum or average). */ values[9] = ((PyObject *)Py_None); values[10] = ((PyObject *)Py_None); @@ -6988,19 +6981,19 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_con case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doc_words)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 1); __PYX_ERR(0, 521, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 1); __PYX_ERR(0, 604, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_indexes)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 2); __PYX_ERR(0, 521, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 2); __PYX_ERR(0, 604, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 3); __PYX_ERR(0, 521, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 3); __PYX_ERR(0, 604, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 4: @@ -7058,7 +7051,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_con } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_document_dm_concat") < 0)) __PYX_ERR(0, 521, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_document_dm_concat") < 0)) __PYX_ERR(0, 604, __pyx_L3_error) } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -7104,7 +7097,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_con } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 521, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 604, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.doc2vec_inner.train_document_dm_concat", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -7112,7 +7105,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_con __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_concat(__pyx_self, __pyx_v_model, __pyx_v_doc_words, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); - /* "gensim/models/doc2vec_inner.pyx":521 + /* "gensim/models/doc2vec_inner.pyx":604 * * * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< @@ -7193,149 +7186,149 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __Pyx_INCREF(__pyx_v_doctag_vectors); __Pyx_INCREF(__pyx_v_doctag_locks); - /* "gensim/models/doc2vec_inner.pyx":524 - * learn_doctags=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): + /* "gensim/models/doc2vec_inner.pyx":652 + * + * """ * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 524, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 652, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 524, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 652, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":525 - * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): + /* "gensim/models/doc2vec_inner.pyx":653 + * """ * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.vocabulary.sample != 0) * cdef int _learn_doctags = learn_doctags */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 525, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 653, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 525, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 653, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":526 + /* "gensim/models/doc2vec_inner.pyx":654 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) # <<<<<<<<<<<<<< * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 526, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 654, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 526, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sample); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 654, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 526, __pyx_L1_error) + __pyx_t_1 = PyObject_RichCompare(__pyx_t_3, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 654, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 526, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 654, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_sample = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":527 + /* "gensim/models/doc2vec_inner.pyx":655 * cdef int negative = model.negative * cdef int sample = (model.vocabulary.sample != 0) * cdef int _learn_doctags = learn_doctags # <<<<<<<<<<<<<< * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 527, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 655, __pyx_L1_error) __pyx_v__learn_doctags = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":528 + /* "gensim/models/doc2vec_inner.pyx":656 * cdef int sample = (model.vocabulary.sample != 0) * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< * cdef int _learn_hidden = learn_hidden * */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 528, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 656, __pyx_L1_error) __pyx_v__learn_words = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":529 + /* "gensim/models/doc2vec_inner.pyx":657 * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * * cdef REAL_t *_word_vectors */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 529, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 657, __pyx_L1_error) __pyx_v__learn_hidden = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":537 + /* "gensim/models/doc2vec_inner.pyx":665 * cdef REAL_t *_work * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int layer1_size = model.trainables.layer1_size * cdef int vector_size = model.docvecs.vector_size */ - __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 537, __pyx_L1_error) + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 665, __pyx_L1_error) __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/doc2vec_inner.pyx":538 + /* "gensim/models/doc2vec_inner.pyx":666 * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha * cdef int layer1_size = model.trainables.layer1_size # <<<<<<<<<<<<<< * cdef int vector_size = model.docvecs.vector_size * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 538, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 666, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 538, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 666, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 538, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 666, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_layer1_size = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":539 + /* "gensim/models/doc2vec_inner.pyx":667 * cdef REAL_t _alpha = alpha * cdef int layer1_size = model.trainables.layer1_size * cdef int vector_size = model.docvecs.vector_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_DOCUMENT_LEN] */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 539, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 667, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 539, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 667, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 539, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 667, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_vector_size = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":547 + /* "gensim/models/doc2vec_inner.pyx":675 * cdef int document_len * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< * cdef int expected_doctag_len = model.dm_tag_count * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 547, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 675, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 547, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 675, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":548 + /* "gensim/models/doc2vec_inner.pyx":676 * cdef int doctag_len * cdef int window = model.window * cdef int expected_doctag_len = model.dm_tag_count # <<<<<<<<<<<<<< * * cdef int i, j, k, m, n */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_dm_tag_count); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 548, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_dm_tag_count); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 676, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 548, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 676, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_expected_doctag_len = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":551 + /* "gensim/models/doc2vec_inner.pyx":679 * * cdef int i, j, k, m, n * cdef long result = 0 # <<<<<<<<<<<<<< @@ -7344,36 +7337,36 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_result = 0; - /* "gensim/models/doc2vec_inner.pyx":552 + /* "gensim/models/doc2vec_inner.pyx":680 * cdef int i, j, k, m, n * cdef long result = 0 * cdef int null_word_index = model.wv.vocab['\0'].index # <<<<<<<<<<<<<< * * # For hierarchical softmax */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 552, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 680, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 552, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 680, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_GetItem(__pyx_t_3, __pyx_kp_s__5); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 552, __pyx_L1_error) + __pyx_t_1 = PyObject_GetItem(__pyx_t_3, __pyx_kp_s__5); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 680, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_index); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 552, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_index); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 680, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 552, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 680, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_null_word_index = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":565 + /* "gensim/models/doc2vec_inner.pyx":693 * cdef unsigned long long next_random * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< * if doctag_len != expected_doctag_len: * return 0 # skip doc without expected number of tags */ - __pyx_t_5 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_5 == ((Py_ssize_t)-1))) __PYX_ERR(0, 565, __pyx_L1_error) + __pyx_t_5 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_5 == ((Py_ssize_t)-1))) __PYX_ERR(0, 693, __pyx_L1_error) __pyx_t_6 = 0x2710; if (((__pyx_t_5 < __pyx_t_6) != 0)) { __pyx_t_7 = __pyx_t_5; @@ -7382,7 +7375,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con } __pyx_v_doctag_len = ((int)__pyx_t_7); - /* "gensim/models/doc2vec_inner.pyx":566 + /* "gensim/models/doc2vec_inner.pyx":694 * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * if doctag_len != expected_doctag_len: # <<<<<<<<<<<<<< @@ -7392,7 +7385,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_8 = ((__pyx_v_doctag_len != __pyx_v_expected_doctag_len) != 0); if (__pyx_t_8) { - /* "gensim/models/doc2vec_inner.pyx":567 + /* "gensim/models/doc2vec_inner.pyx":695 * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * if doctag_len != expected_doctag_len: * return 0 # skip doc without expected number of tags # <<<<<<<<<<<<<< @@ -7404,7 +7397,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_r = __pyx_int_0; goto __pyx_L0; - /* "gensim/models/doc2vec_inner.pyx":566 + /* "gensim/models/doc2vec_inner.pyx":694 * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * if doctag_len != expected_doctag_len: # <<<<<<<<<<<<<< @@ -7413,7 +7406,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":570 + /* "gensim/models/doc2vec_inner.pyx":698 * * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< @@ -7424,22 +7417,22 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":571 + /* "gensim/models/doc2vec_inner.pyx":699 * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: * word_vectors = model.wv.vectors # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 571, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 699, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 571, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 699, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":570 + /* "gensim/models/doc2vec_inner.pyx":698 * * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< @@ -7448,17 +7441,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":572 + /* "gensim/models/doc2vec_inner.pyx":700 * if word_vectors is None: * word_vectors = model.wv.vectors * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< * if doctag_vectors is None: * doctag_vectors = model.docvecs.vectors_docs */ - if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 572, __pyx_L1_error) + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 700, __pyx_L1_error) __pyx_v__word_vectors = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); - /* "gensim/models/doc2vec_inner.pyx":573 + /* "gensim/models/doc2vec_inner.pyx":701 * word_vectors = model.wv.vectors * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: # <<<<<<<<<<<<<< @@ -7469,22 +7462,22 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { - /* "gensim/models/doc2vec_inner.pyx":574 + /* "gensim/models/doc2vec_inner.pyx":702 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: * doctag_vectors = model.docvecs.vectors_docs # <<<<<<<<<<<<<< * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 574, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 702, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_docs); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 574, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_docs); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 702, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":573 + /* "gensim/models/doc2vec_inner.pyx":701 * word_vectors = model.wv.vectors * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: # <<<<<<<<<<<<<< @@ -7493,17 +7486,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":575 + /* "gensim/models/doc2vec_inner.pyx":703 * if doctag_vectors is None: * doctag_vectors = model.docvecs.vectors_docs * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) # <<<<<<<<<<<<<< * if word_locks is None: * word_locks = model.trainables.vectors_lockf */ - if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 575, __pyx_L1_error) + if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 703, __pyx_L1_error) __pyx_v__doctag_vectors = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_vectors))); - /* "gensim/models/doc2vec_inner.pyx":576 + /* "gensim/models/doc2vec_inner.pyx":704 * doctag_vectors = model.docvecs.vectors_docs * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< @@ -7514,22 +7507,22 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":577 + /* "gensim/models/doc2vec_inner.pyx":705 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: * word_locks = model.trainables.vectors_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 577, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 705, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 577, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_vectors_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 705, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":576 + /* "gensim/models/doc2vec_inner.pyx":704 * doctag_vectors = model.docvecs.vectors_docs * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< @@ -7538,17 +7531,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":578 + /* "gensim/models/doc2vec_inner.pyx":706 * if word_locks is None: * word_locks = model.trainables.vectors_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< * if doctag_locks is None: * doctag_locks = model.trainables.vectors_docs_lockf */ - if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 578, __pyx_L1_error) + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 706, __pyx_L1_error) __pyx_v__word_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); - /* "gensim/models/doc2vec_inner.pyx":579 + /* "gensim/models/doc2vec_inner.pyx":707 * word_locks = model.trainables.vectors_lockf * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: # <<<<<<<<<<<<<< @@ -7559,22 +7552,22 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { - /* "gensim/models/doc2vec_inner.pyx":580 + /* "gensim/models/doc2vec_inner.pyx":708 * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: * doctag_locks = model.trainables.vectors_docs_lockf # <<<<<<<<<<<<<< * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 580, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 708, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_docs_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 580, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_vectors_docs_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 708, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":579 + /* "gensim/models/doc2vec_inner.pyx":707 * word_locks = model.trainables.vectors_lockf * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: # <<<<<<<<<<<<<< @@ -7583,17 +7576,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":581 + /* "gensim/models/doc2vec_inner.pyx":709 * if doctag_locks is None: * doctag_locks = model.trainables.vectors_docs_lockf * _doctag_locks = (np.PyArray_DATA(doctag_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 581, __pyx_L1_error) + if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 709, __pyx_L1_error) __pyx_v__doctag_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_locks))); - /* "gensim/models/doc2vec_inner.pyx":583 + /* "gensim/models/doc2vec_inner.pyx":711 * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< @@ -7603,23 +7596,23 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_8 = (__pyx_v_hs != 0); if (__pyx_t_8) { - /* "gensim/models/doc2vec_inner.pyx":584 + /* "gensim/models/doc2vec_inner.pyx":712 * * if hs: * syn1 = (np.PyArray_DATA(model.trainables.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 584, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 712, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 584, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 712, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 584, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 712, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":583 + /* "gensim/models/doc2vec_inner.pyx":711 * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< @@ -7628,7 +7621,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":586 + /* "gensim/models/doc2vec_inner.pyx":714 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -7638,55 +7631,55 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_8 = (__pyx_v_negative != 0); if (__pyx_t_8) { - /* "gensim/models/doc2vec_inner.pyx":587 + /* "gensim/models/doc2vec_inner.pyx":715 * * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 587, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 715, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 587, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 715, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 587, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 715, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":588 + /* "gensim/models/doc2vec_inner.pyx":716 * if negative: * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 588, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 716, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 588, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 716, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 588, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 716, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":589 + /* "gensim/models/doc2vec_inner.pyx":717 * syn1neg = (np.PyArray_DATA(model.trainables.syn1neg)) * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 589, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocabulary); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 717, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 589, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 717, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_7 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 589, __pyx_L1_error) + __pyx_t_7 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 717, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_7; - /* "gensim/models/doc2vec_inner.pyx":586 + /* "gensim/models/doc2vec_inner.pyx":714 * syn1 = (np.PyArray_DATA(model.trainables.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -7695,7 +7688,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":590 + /* "gensim/models/doc2vec_inner.pyx":718 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -7713,41 +7706,41 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_L11_bool_binop_done:; if (__pyx_t_8) { - /* "gensim/models/doc2vec_inner.pyx":591 + /* "gensim/models/doc2vec_inner.pyx":719 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 591, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 719, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 591, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 719, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 591, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 719, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 591, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 719, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 591, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 719, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 591, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 719, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 591, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 719, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 591, __pyx_L1_error) + __pyx_t_10 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 719, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_11 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_10); if (unlikely((__pyx_t_11 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 591, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_10); if (unlikely((__pyx_t_11 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 719, __pyx_L1_error) __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_v_next_random = __pyx_t_11; - /* "gensim/models/doc2vec_inner.pyx":590 + /* "gensim/models/doc2vec_inner.pyx":718 * cum_table = (np.PyArray_DATA(model.vocabulary.cum_table)) * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -7756,7 +7749,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":594 + /* "gensim/models/doc2vec_inner.pyx":722 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< @@ -7767,32 +7760,32 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":595 + /* "gensim/models/doc2vec_inner.pyx":723 * # convert Python structures to primitive types, so we can release the GIL * if work is None: * work = zeros(model.trainables.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _work = np.PyArray_DATA(work) * if neu1 is None: */ - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 595, __pyx_L1_error) + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 723, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 595, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 723, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 595, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 723, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 595, __pyx_L1_error) + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 723, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 595, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 723, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 595, __pyx_L1_error) + __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 723, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); - if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_12) < 0) __PYX_ERR(0, 595, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_12) < 0) __PYX_ERR(0, 723, __pyx_L1_error) __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; - __pyx_t_12 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_3, __pyx_t_1); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 595, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_3, __pyx_t_1); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 723, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -7800,7 +7793,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __Pyx_DECREF_SET(__pyx_v_work, __pyx_t_12); __pyx_t_12 = 0; - /* "gensim/models/doc2vec_inner.pyx":594 + /* "gensim/models/doc2vec_inner.pyx":722 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< @@ -7809,17 +7802,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":596 + /* "gensim/models/doc2vec_inner.pyx":724 * if work is None: * work = zeros(model.trainables.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< * if neu1 is None: * neu1 = zeros(model.trainables.layer1_size, dtype=REAL) */ - if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 596, __pyx_L1_error) + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 724, __pyx_L1_error) __pyx_v__work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); - /* "gensim/models/doc2vec_inner.pyx":597 + /* "gensim/models/doc2vec_inner.pyx":725 * work = zeros(model.trainables.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) * if neu1 is None: # <<<<<<<<<<<<<< @@ -7830,32 +7823,32 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { - /* "gensim/models/doc2vec_inner.pyx":598 + /* "gensim/models/doc2vec_inner.pyx":726 * _work = np.PyArray_DATA(work) * if neu1 is None: * neu1 = zeros(model.trainables.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _neu1 = np.PyArray_DATA(neu1) * */ - __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 598, __pyx_L1_error) + __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 726, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 598, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_trainables); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 726, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 598, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 726, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 598, __pyx_L1_error) + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 726, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_GIVEREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 598, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 726, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 598, __pyx_L1_error) + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 726, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_dtype, __pyx_t_10) < 0) __PYX_ERR(0, 598, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_dtype, __pyx_t_10) < 0) __PYX_ERR(0, 726, __pyx_L1_error) __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_12, __pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 598, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_12, __pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 726, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -7863,7 +7856,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __Pyx_DECREF_SET(__pyx_v_neu1, __pyx_t_10); __pyx_t_10 = 0; - /* "gensim/models/doc2vec_inner.pyx":597 + /* "gensim/models/doc2vec_inner.pyx":725 * work = zeros(model.trainables.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) * if neu1 is None: # <<<<<<<<<<<<<< @@ -7872,32 +7865,32 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":599 + /* "gensim/models/doc2vec_inner.pyx":727 * if neu1 is None: * neu1 = zeros(model.trainables.layer1_size, dtype=REAL) * _neu1 = np.PyArray_DATA(neu1) # <<<<<<<<<<<<<< * * vlookup = model.wv.vocab */ - if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 599, __pyx_L1_error) + if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 727, __pyx_L1_error) __pyx_v__neu1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_neu1))); - /* "gensim/models/doc2vec_inner.pyx":601 + /* "gensim/models/doc2vec_inner.pyx":729 * _neu1 = np.PyArray_DATA(neu1) * * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * i = 0 * for token in doc_words: */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 601, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 729, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_10, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 601, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_10, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 729, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_v_vlookup = __pyx_t_3; __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":602 + /* "gensim/models/doc2vec_inner.pyx":730 * * vlookup = model.wv.vocab * i = 0 # <<<<<<<<<<<<<< @@ -7906,7 +7899,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_i = 0; - /* "gensim/models/doc2vec_inner.pyx":603 + /* "gensim/models/doc2vec_inner.pyx":731 * vlookup = model.wv.vocab * i = 0 * for token in doc_words: # <<<<<<<<<<<<<< @@ -7917,26 +7910,26 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_3 = __pyx_v_doc_words; __Pyx_INCREF(__pyx_t_3); __pyx_t_7 = 0; __pyx_t_13 = NULL; } else { - __pyx_t_7 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_doc_words); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 603, __pyx_L1_error) + __pyx_t_7 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_doc_words); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 731, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_13 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 603, __pyx_L1_error) + __pyx_t_13 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 731, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_13)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_10 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 603, __pyx_L1_error) + __pyx_t_10 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 731, __pyx_L1_error) #else - __pyx_t_10 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 603, __pyx_L1_error) + __pyx_t_10 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 731, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); #endif } else { if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 603, __pyx_L1_error) + __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 731, __pyx_L1_error) #else - __pyx_t_10 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 603, __pyx_L1_error) + __pyx_t_10 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 731, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); #endif } @@ -7946,7 +7939,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 603, __pyx_L1_error) + else __PYX_ERR(0, 731, __pyx_L1_error) } break; } @@ -7955,16 +7948,16 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_10); __pyx_t_10 = 0; - /* "gensim/models/doc2vec_inner.pyx":604 + /* "gensim/models/doc2vec_inner.pyx":732 * i = 0 * for token in doc_words: * predict_word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if predict_word is None: # shrink document to leave out word * continue # leaving i unchanged */ - __pyx_t_8 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_8 < 0)) __PYX_ERR(0, 604, __pyx_L1_error) + __pyx_t_8 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_8 < 0)) __PYX_ERR(0, 732, __pyx_L1_error) if ((__pyx_t_8 != 0)) { - __pyx_t_1 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 604, __pyx_L1_error) + __pyx_t_1 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 732, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_10 = __pyx_t_1; __pyx_t_1 = 0; @@ -7975,7 +7968,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __Pyx_XDECREF_SET(__pyx_v_predict_word, __pyx_t_10); __pyx_t_10 = 0; - /* "gensim/models/doc2vec_inner.pyx":605 + /* "gensim/models/doc2vec_inner.pyx":733 * for token in doc_words: * predict_word = vlookup[token] if token in vlookup else None * if predict_word is None: # shrink document to leave out word # <<<<<<<<<<<<<< @@ -7986,7 +7979,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":606 + /* "gensim/models/doc2vec_inner.pyx":734 * predict_word = vlookup[token] if token in vlookup else None * if predict_word is None: # shrink document to leave out word * continue # leaving i unchanged # <<<<<<<<<<<<<< @@ -7995,7 +7988,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ goto __pyx_L15_continue; - /* "gensim/models/doc2vec_inner.pyx":605 + /* "gensim/models/doc2vec_inner.pyx":733 * for token in doc_words: * predict_word = vlookup[token] if token in vlookup else None * if predict_word is None: # shrink document to leave out word # <<<<<<<<<<<<<< @@ -8004,7 +7997,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":607 + /* "gensim/models/doc2vec_inner.pyx":735 * if predict_word is None: # shrink document to leave out word * continue # leaving i unchanged * if sample and predict_word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -8017,20 +8010,20 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = __pyx_t_8; goto __pyx_L19_bool_binop_done; } - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 607, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 735, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); - __pyx_t_1 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 607, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 735, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_12 = PyObject_RichCompare(__pyx_t_10, __pyx_t_1, Py_LT); __Pyx_XGOTREF(__pyx_t_12); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 607, __pyx_L1_error) + __pyx_t_12 = PyObject_RichCompare(__pyx_t_10, __pyx_t_1, Py_LT); __Pyx_XGOTREF(__pyx_t_12); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 735, __pyx_L1_error) __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_12); if (unlikely(__pyx_t_8 < 0)) __PYX_ERR(0, 607, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_12); if (unlikely(__pyx_t_8 < 0)) __PYX_ERR(0, 735, __pyx_L1_error) __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __pyx_t_9 = __pyx_t_8; __pyx_L19_bool_binop_done:; if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":608 + /* "gensim/models/doc2vec_inner.pyx":736 * continue # leaving i unchanged * if sample and predict_word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< @@ -8039,7 +8032,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ goto __pyx_L15_continue; - /* "gensim/models/doc2vec_inner.pyx":607 + /* "gensim/models/doc2vec_inner.pyx":735 * if predict_word is None: # shrink document to leave out word * continue # leaving i unchanged * if sample and predict_word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -8048,20 +8041,20 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":609 + /* "gensim/models/doc2vec_inner.pyx":737 * if sample and predict_word.sample_int < random_int32(&next_random): * continue * indexes[i] = predict_word.index # <<<<<<<<<<<<<< * if hs: * codelens[i] = len(predict_word.code) */ - __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 609, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 737, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_12); if (unlikely((__pyx_t_14 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 609, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_12); if (unlikely((__pyx_t_14 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 737, __pyx_L1_error) __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_14; - /* "gensim/models/doc2vec_inner.pyx":610 + /* "gensim/models/doc2vec_inner.pyx":738 * continue * indexes[i] = predict_word.index * if hs: # <<<<<<<<<<<<<< @@ -8071,46 +8064,46 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_v_hs != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":611 + /* "gensim/models/doc2vec_inner.pyx":739 * indexes[i] = predict_word.index * if hs: * codelens[i] = len(predict_word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(predict_word.code) * points[i] = np.PyArray_DATA(predict_word.point) */ - __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 611, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 739, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); - __pyx_t_5 = PyObject_Length(__pyx_t_12); if (unlikely(__pyx_t_5 == ((Py_ssize_t)-1))) __PYX_ERR(0, 611, __pyx_L1_error) + __pyx_t_5 = PyObject_Length(__pyx_t_12); if (unlikely(__pyx_t_5 == ((Py_ssize_t)-1))) __PYX_ERR(0, 739, __pyx_L1_error) __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_5); - /* "gensim/models/doc2vec_inner.pyx":612 + /* "gensim/models/doc2vec_inner.pyx":740 * if hs: * codelens[i] = len(predict_word.code) * codes[i] = np.PyArray_DATA(predict_word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(predict_word.point) * result += 1 */ - __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 612, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 740, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); - if (!(likely(((__pyx_t_12) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_12, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 612, __pyx_L1_error) + if (!(likely(((__pyx_t_12) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_12, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 740, __pyx_L1_error) (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_12))); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; - /* "gensim/models/doc2vec_inner.pyx":613 + /* "gensim/models/doc2vec_inner.pyx":741 * codelens[i] = len(predict_word.code) * codes[i] = np.PyArray_DATA(predict_word.code) * points[i] = np.PyArray_DATA(predict_word.point) # <<<<<<<<<<<<<< * result += 1 * i += 1 */ - __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 613, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 741, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); - if (!(likely(((__pyx_t_12) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_12, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 613, __pyx_L1_error) + if (!(likely(((__pyx_t_12) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_12, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 741, __pyx_L1_error) (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_12))); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; - /* "gensim/models/doc2vec_inner.pyx":610 + /* "gensim/models/doc2vec_inner.pyx":738 * continue * indexes[i] = predict_word.index * if hs: # <<<<<<<<<<<<<< @@ -8119,7 +8112,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":614 + /* "gensim/models/doc2vec_inner.pyx":742 * codes[i] = np.PyArray_DATA(predict_word.code) * points[i] = np.PyArray_DATA(predict_word.point) * result += 1 # <<<<<<<<<<<<<< @@ -8128,7 +8121,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_result = (__pyx_v_result + 1); - /* "gensim/models/doc2vec_inner.pyx":615 + /* "gensim/models/doc2vec_inner.pyx":743 * points[i] = np.PyArray_DATA(predict_word.point) * result += 1 * i += 1 # <<<<<<<<<<<<<< @@ -8137,7 +8130,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_i = (__pyx_v_i + 1); - /* "gensim/models/doc2vec_inner.pyx":616 + /* "gensim/models/doc2vec_inner.pyx":744 * result += 1 * i += 1 * if i == MAX_DOCUMENT_LEN: # <<<<<<<<<<<<<< @@ -8147,7 +8140,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = ((__pyx_v_i == 0x2710) != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":617 + /* "gensim/models/doc2vec_inner.pyx":745 * i += 1 * if i == MAX_DOCUMENT_LEN: * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< @@ -8156,7 +8149,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ goto __pyx_L16_break; - /* "gensim/models/doc2vec_inner.pyx":616 + /* "gensim/models/doc2vec_inner.pyx":744 * result += 1 * i += 1 * if i == MAX_DOCUMENT_LEN: # <<<<<<<<<<<<<< @@ -8165,7 +8158,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":603 + /* "gensim/models/doc2vec_inner.pyx":731 * vlookup = model.wv.vocab * i = 0 * for token in doc_words: # <<<<<<<<<<<<<< @@ -8177,7 +8170,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_L16_break:; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":618 + /* "gensim/models/doc2vec_inner.pyx":746 * if i == MAX_DOCUMENT_LEN: * break # TODO: log warning, tally overflow? * document_len = i # <<<<<<<<<<<<<< @@ -8186,7 +8179,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_document_len = __pyx_v_i; - /* "gensim/models/doc2vec_inner.pyx":620 + /* "gensim/models/doc2vec_inner.pyx":748 * document_len = i * * for i in range(doctag_len): # <<<<<<<<<<<<<< @@ -8197,20 +8190,20 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_2; __pyx_t_15+=1) { __pyx_v_i = __pyx_t_15; - /* "gensim/models/doc2vec_inner.pyx":621 + /* "gensim/models/doc2vec_inner.pyx":749 * * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 621, __pyx_L1_error) + __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 749, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_3); if (unlikely((__pyx_t_14 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 621, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_3); if (unlikely((__pyx_t_14 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 749, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_14; - /* "gensim/models/doc2vec_inner.pyx":622 + /* "gensim/models/doc2vec_inner.pyx":750 * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< @@ -8220,7 +8213,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_v_result = (__pyx_v_result + 1); } - /* "gensim/models/doc2vec_inner.pyx":625 + /* "gensim/models/doc2vec_inner.pyx":753 * * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< @@ -8235,7 +8228,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con #endif /*try:*/ { - /* "gensim/models/doc2vec_inner.pyx":626 + /* "gensim/models/doc2vec_inner.pyx":754 * # release GIL & train on the document * with nogil: * for i in range(document_len): # <<<<<<<<<<<<<< @@ -8246,7 +8239,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_2; __pyx_t_15+=1) { __pyx_v_i = __pyx_t_15; - /* "gensim/models/doc2vec_inner.pyx":627 + /* "gensim/models/doc2vec_inner.pyx":755 * with nogil: * for i in range(document_len): * j = i - window # negative OK: will pad with null word # <<<<<<<<<<<<<< @@ -8255,7 +8248,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_j = (__pyx_v_i - __pyx_v_window); - /* "gensim/models/doc2vec_inner.pyx":628 + /* "gensim/models/doc2vec_inner.pyx":756 * for i in range(document_len): * j = i - window # negative OK: will pad with null word * k = i + window + 1 # past document end OK: will pad with null word # <<<<<<<<<<<<<< @@ -8264,7 +8257,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_k = ((__pyx_v_i + __pyx_v_window) + 1); - /* "gensim/models/doc2vec_inner.pyx":631 + /* "gensim/models/doc2vec_inner.pyx":759 * * # compose l1 & clear work * for m in range(doctag_len): # <<<<<<<<<<<<<< @@ -8275,7 +8268,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con for (__pyx_t_17 = 0; __pyx_t_17 < __pyx_t_16; __pyx_t_17+=1) { __pyx_v_m = __pyx_t_17; - /* "gensim/models/doc2vec_inner.pyx":633 + /* "gensim/models/doc2vec_inner.pyx":761 * for m in range(doctag_len): * # doc vector(s) * memcpy(&_neu1[m * vector_size], &_doctag_vectors[_doctag_indexes[m] * vector_size], # <<<<<<<<<<<<<< @@ -8285,7 +8278,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con memcpy((&(__pyx_v__neu1[(__pyx_v_m * __pyx_v_vector_size)])), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); } - /* "gensim/models/doc2vec_inner.pyx":635 + /* "gensim/models/doc2vec_inner.pyx":763 * memcpy(&_neu1[m * vector_size], &_doctag_vectors[_doctag_indexes[m] * vector_size], * vector_size * cython.sizeof(REAL_t)) * n = 0 # <<<<<<<<<<<<<< @@ -8294,7 +8287,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_n = 0; - /* "gensim/models/doc2vec_inner.pyx":636 + /* "gensim/models/doc2vec_inner.pyx":764 * vector_size * cython.sizeof(REAL_t)) * n = 0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -8305,7 +8298,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con for (__pyx_t_17 = __pyx_v_j; __pyx_t_17 < __pyx_t_16; __pyx_t_17+=1) { __pyx_v_m = __pyx_t_17; - /* "gensim/models/doc2vec_inner.pyx":638 + /* "gensim/models/doc2vec_inner.pyx":766 * for m in range(j, k): * # word vectors in window * if m == i: # <<<<<<<<<<<<<< @@ -8315,7 +8308,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":639 + /* "gensim/models/doc2vec_inner.pyx":767 * # word vectors in window * if m == i: * continue # <<<<<<<<<<<<<< @@ -8324,7 +8317,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ goto __pyx_L32_continue; - /* "gensim/models/doc2vec_inner.pyx":638 + /* "gensim/models/doc2vec_inner.pyx":766 * for m in range(j, k): * # word vectors in window * if m == i: # <<<<<<<<<<<<<< @@ -8333,7 +8326,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":640 + /* "gensim/models/doc2vec_inner.pyx":768 * if m == i: * continue * if m < 0 or m >= document_len: # <<<<<<<<<<<<<< @@ -8351,7 +8344,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_L36_bool_binop_done:; if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":641 + /* "gensim/models/doc2vec_inner.pyx":769 * continue * if m < 0 or m >= document_len: * window_indexes[n] = null_word_index # <<<<<<<<<<<<<< @@ -8360,7 +8353,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ (__pyx_v_window_indexes[__pyx_v_n]) = __pyx_v_null_word_index; - /* "gensim/models/doc2vec_inner.pyx":640 + /* "gensim/models/doc2vec_inner.pyx":768 * if m == i: * continue * if m < 0 or m >= document_len: # <<<<<<<<<<<<<< @@ -8370,7 +8363,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con goto __pyx_L35; } - /* "gensim/models/doc2vec_inner.pyx":643 + /* "gensim/models/doc2vec_inner.pyx":771 * window_indexes[n] = null_word_index * else: * window_indexes[n] = indexes[m] # <<<<<<<<<<<<<< @@ -8382,7 +8375,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con } __pyx_L35:; - /* "gensim/models/doc2vec_inner.pyx":644 + /* "gensim/models/doc2vec_inner.pyx":772 * else: * window_indexes[n] = indexes[m] * n += 1 # <<<<<<<<<<<<<< @@ -8393,7 +8386,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_L32_continue:; } - /* "gensim/models/doc2vec_inner.pyx":645 + /* "gensim/models/doc2vec_inner.pyx":773 * window_indexes[n] = indexes[m] * n += 1 * for m in range(2 * window): # <<<<<<<<<<<<<< @@ -8404,7 +8397,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_6; __pyx_t_16+=1) { __pyx_v_m = __pyx_t_16; - /* "gensim/models/doc2vec_inner.pyx":646 + /* "gensim/models/doc2vec_inner.pyx":774 * n += 1 * for m in range(2 * window): * memcpy(&_neu1[(doctag_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], # <<<<<<<<<<<<<< @@ -8414,7 +8407,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con memcpy((&(__pyx_v__neu1[((__pyx_v_doctag_len + __pyx_v_m) * __pyx_v_vector_size)])), (&(__pyx_v__word_vectors[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); } - /* "gensim/models/doc2vec_inner.pyx":648 + /* "gensim/models/doc2vec_inner.pyx":776 * memcpy(&_neu1[(doctag_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], * vector_size * cython.sizeof(REAL_t)) * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error # <<<<<<<<<<<<<< @@ -8423,7 +8416,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ memset(__pyx_v__work, 0, (__pyx_v_layer1_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/doc2vec_inner.pyx":650 + /* "gensim/models/doc2vec_inner.pyx":778 * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error * * if hs: # <<<<<<<<<<<<<< @@ -8433,7 +8426,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_v_hs != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":651 + /* "gensim/models/doc2vec_inner.pyx":779 * * if hs: * fast_document_dmc_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< @@ -8442,7 +8435,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__neu1, __pyx_v_syn1, __pyx_v__alpha, __pyx_v__work, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v__learn_hidden); - /* "gensim/models/doc2vec_inner.pyx":650 + /* "gensim/models/doc2vec_inner.pyx":778 * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error * * if hs: # <<<<<<<<<<<<<< @@ -8451,7 +8444,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":654 + /* "gensim/models/doc2vec_inner.pyx":782 * _neu1, syn1, _alpha, _work, * layer1_size, vector_size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< @@ -8461,7 +8454,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_v_negative != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":655 + /* "gensim/models/doc2vec_inner.pyx":783 * layer1_size, vector_size, _learn_hidden) * if negative: * next_random = fast_document_dmc_neg(negative, cum_table, cum_table_len, next_random, # <<<<<<<<<<<<<< @@ -8470,7 +8463,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_next_random = __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_next_random, __pyx_v__neu1, __pyx_v_syn1neg, (__pyx_v_indexes[__pyx_v_i]), __pyx_v__alpha, __pyx_v__work, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v__learn_hidden); - /* "gensim/models/doc2vec_inner.pyx":654 + /* "gensim/models/doc2vec_inner.pyx":782 * _neu1, syn1, _alpha, _work, * layer1_size, vector_size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< @@ -8479,7 +8472,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":659 + /* "gensim/models/doc2vec_inner.pyx":787 * layer1_size, vector_size, _learn_hidden) * * if _learn_doctags: # <<<<<<<<<<<<<< @@ -8489,7 +8482,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_v__learn_doctags != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":660 + /* "gensim/models/doc2vec_inner.pyx":788 * * if _learn_doctags: * for m in range(doctag_len): # <<<<<<<<<<<<<< @@ -8500,7 +8493,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con for (__pyx_t_17 = 0; __pyx_t_17 < __pyx_t_16; __pyx_t_17+=1) { __pyx_v_m = __pyx_t_17; - /* "gensim/models/doc2vec_inner.pyx":661 + /* "gensim/models/doc2vec_inner.pyx":789 * if _learn_doctags: * for m in range(doctag_len): * our_saxpy(&vector_size, &_doctag_locks[_doctag_indexes[m]], &_work[m * vector_size], # <<<<<<<<<<<<<< @@ -8510,7 +8503,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v__doctag_locks[(__pyx_v__doctag_indexes[__pyx_v_m])])), (&(__pyx_v__work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); } - /* "gensim/models/doc2vec_inner.pyx":659 + /* "gensim/models/doc2vec_inner.pyx":787 * layer1_size, vector_size, _learn_hidden) * * if _learn_doctags: # <<<<<<<<<<<<<< @@ -8519,7 +8512,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ } - /* "gensim/models/doc2vec_inner.pyx":663 + /* "gensim/models/doc2vec_inner.pyx":791 * our_saxpy(&vector_size, &_doctag_locks[_doctag_indexes[m]], &_work[m * vector_size], * &ONE, &_doctag_vectors[_doctag_indexes[m] * vector_size], &ONE) * if _learn_words: # <<<<<<<<<<<<<< @@ -8529,7 +8522,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_v__learn_words != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":664 + /* "gensim/models/doc2vec_inner.pyx":792 * &ONE, &_doctag_vectors[_doctag_indexes[m] * vector_size], &ONE) * if _learn_words: * for m in range(2 * window): # <<<<<<<<<<<<<< @@ -8540,7 +8533,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_6; __pyx_t_16+=1) { __pyx_v_m = __pyx_t_16; - /* "gensim/models/doc2vec_inner.pyx":665 + /* "gensim/models/doc2vec_inner.pyx":793 * if _learn_words: * for m in range(2 * window): * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doctag_len + m) * vector_size], # <<<<<<<<<<<<<< @@ -8550,7 +8543,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v__word_locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v__work[((__pyx_v_doctag_len + __pyx_v_m) * __pyx_v_vector_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__word_vectors[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); } - /* "gensim/models/doc2vec_inner.pyx":663 + /* "gensim/models/doc2vec_inner.pyx":791 * our_saxpy(&vector_size, &_doctag_locks[_doctag_indexes[m]], &_work[m * vector_size], * &ONE, &_doctag_vectors[_doctag_indexes[m] * vector_size], &ONE) * if _learn_words: # <<<<<<<<<<<<<< @@ -8561,7 +8554,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con } } - /* "gensim/models/doc2vec_inner.pyx":625 + /* "gensim/models/doc2vec_inner.pyx":753 * * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< @@ -8580,19 +8573,19 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con } } - /* "gensim/models/doc2vec_inner.pyx":668 + /* "gensim/models/doc2vec_inner.pyx":796 * &ONE, &_word_vectors[window_indexes[m] * vector_size], &ONE) * * return result # <<<<<<<<<<<<<< */ __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 668, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 796, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_r = __pyx_t_3; __pyx_t_3 = 0; goto __pyx_L0; - /* "gensim/models/doc2vec_inner.pyx":521 + /* "gensim/models/doc2vec_inner.pyx":604 * * * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< @@ -8623,7 +8616,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con return __pyx_r; } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":214 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":214 * # experimental exception made for __getbuffer__ and __releasebuffer__ * # -- the details of this may change. * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< @@ -8670,7 +8663,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_GIVEREF(__pyx_v_info->obj); } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":220 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":220 * # of flags * * if info == NULL: return # <<<<<<<<<<<<<< @@ -8683,7 +8676,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L0; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":223 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":223 * * cdef int copy_shape, i, ndim * cdef int endian_detector = 1 # <<<<<<<<<<<<<< @@ -8692,7 +8685,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_endian_detector = 1; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":224 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":224 * cdef int copy_shape, i, ndim * cdef int endian_detector = 1 * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<< @@ -8701,7 +8694,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":226 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":226 * cdef bint little_endian = ((&endian_detector)[0] != 0) * * ndim = PyArray_NDIM(self) # <<<<<<<<<<<<<< @@ -8710,7 +8703,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_ndim = PyArray_NDIM(__pyx_v_self); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":228 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":228 * ndim = PyArray_NDIM(self) * * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< @@ -8720,7 +8713,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0); if (__pyx_t_1) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":229 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":229 * * if sizeof(npy_intp) != sizeof(Py_ssize_t): * copy_shape = 1 # <<<<<<<<<<<<<< @@ -8729,7 +8722,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_copy_shape = 1; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":228 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":228 * ndim = PyArray_NDIM(self) * * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< @@ -8739,7 +8732,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L4; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":231 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":231 * copy_shape = 1 * else: * copy_shape = 0 # <<<<<<<<<<<<<< @@ -8751,7 +8744,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L4:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 * copy_shape = 0 * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -8765,7 +8758,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L6_bool_binop_done; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":234 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":234 * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): # <<<<<<<<<<<<<< @@ -8776,7 +8769,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = __pyx_t_2; __pyx_L6_bool_binop_done:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 * copy_shape = 0 * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -8785,7 +8778,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ if (__pyx_t_1) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235 * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): * raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<< @@ -8798,7 +8791,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __PYX_ERR(1, 235, __pyx_L1_error) - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 * copy_shape = 0 * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -8807,7 +8800,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 * raise ValueError(u"ndarray is not C contiguous") * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -8821,7 +8814,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L9_bool_binop_done; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":238 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":238 * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): # <<<<<<<<<<<<<< @@ -8832,7 +8825,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = __pyx_t_2; __pyx_L9_bool_binop_done:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 * raise ValueError(u"ndarray is not C contiguous") * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -8841,7 +8834,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ if (__pyx_t_1) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239 * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): * raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<< @@ -8854,7 +8847,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __PYX_ERR(1, 239, __pyx_L1_error) - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 * raise ValueError(u"ndarray is not C contiguous") * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -8863,7 +8856,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":241 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":241 * raise ValueError(u"ndarray is not Fortran contiguous") * * info.buf = PyArray_DATA(self) # <<<<<<<<<<<<<< @@ -8872,7 +8865,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->buf = PyArray_DATA(__pyx_v_self); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":242 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":242 * * info.buf = PyArray_DATA(self) * info.ndim = ndim # <<<<<<<<<<<<<< @@ -8881,7 +8874,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->ndim = __pyx_v_ndim; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":243 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":243 * info.buf = PyArray_DATA(self) * info.ndim = ndim * if copy_shape: # <<<<<<<<<<<<<< @@ -8891,7 +8884,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = (__pyx_v_copy_shape != 0); if (__pyx_t_1) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":246 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":246 * # Allocate new buffer for strides and shape info. * # This is allocated as one block, strides first. * info.strides = PyObject_Malloc(sizeof(Py_ssize_t) * 2 * ndim) # <<<<<<<<<<<<<< @@ -8900,7 +8893,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->strides = ((Py_ssize_t *)PyObject_Malloc((((sizeof(Py_ssize_t)) * 2) * ((size_t)__pyx_v_ndim)))); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":247 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":247 * # This is allocated as one block, strides first. * info.strides = PyObject_Malloc(sizeof(Py_ssize_t) * 2 * ndim) * info.shape = info.strides + ndim # <<<<<<<<<<<<<< @@ -8909,7 +8902,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->shape = (__pyx_v_info->strides + __pyx_v_ndim); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":248 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":248 * info.strides = PyObject_Malloc(sizeof(Py_ssize_t) * 2 * ndim) * info.shape = info.strides + ndim * for i in range(ndim): # <<<<<<<<<<<<<< @@ -8920,7 +8913,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_i = __pyx_t_5; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":249 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":249 * info.shape = info.strides + ndim * for i in range(ndim): * info.strides[i] = PyArray_STRIDES(self)[i] # <<<<<<<<<<<<<< @@ -8929,7 +8922,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ (__pyx_v_info->strides[__pyx_v_i]) = (PyArray_STRIDES(__pyx_v_self)[__pyx_v_i]); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":250 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":250 * for i in range(ndim): * info.strides[i] = PyArray_STRIDES(self)[i] * info.shape[i] = PyArray_DIMS(self)[i] # <<<<<<<<<<<<<< @@ -8939,7 +8932,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P (__pyx_v_info->shape[__pyx_v_i]) = (PyArray_DIMS(__pyx_v_self)[__pyx_v_i]); } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":243 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":243 * info.buf = PyArray_DATA(self) * info.ndim = ndim * if copy_shape: # <<<<<<<<<<<<<< @@ -8949,7 +8942,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L11; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":252 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":252 * info.shape[i] = PyArray_DIMS(self)[i] * else: * info.strides = PyArray_STRIDES(self) # <<<<<<<<<<<<<< @@ -8959,7 +8952,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P /*else*/ { __pyx_v_info->strides = ((Py_ssize_t *)PyArray_STRIDES(__pyx_v_self)); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":253 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":253 * else: * info.strides = PyArray_STRIDES(self) * info.shape = PyArray_DIMS(self) # <<<<<<<<<<<<<< @@ -8970,7 +8963,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L11:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":254 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":254 * info.strides = PyArray_STRIDES(self) * info.shape = PyArray_DIMS(self) * info.suboffsets = NULL # <<<<<<<<<<<<<< @@ -8979,7 +8972,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->suboffsets = NULL; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":255 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":255 * info.shape = PyArray_DIMS(self) * info.suboffsets = NULL * info.itemsize = PyArray_ITEMSIZE(self) # <<<<<<<<<<<<<< @@ -8988,7 +8981,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->itemsize = PyArray_ITEMSIZE(__pyx_v_self); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":256 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":256 * info.suboffsets = NULL * info.itemsize = PyArray_ITEMSIZE(self) * info.readonly = not PyArray_ISWRITEABLE(self) # <<<<<<<<<<<<<< @@ -8997,7 +8990,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->readonly = (!(PyArray_ISWRITEABLE(__pyx_v_self) != 0)); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":259 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":259 * * cdef int t * cdef char* f = NULL # <<<<<<<<<<<<<< @@ -9006,7 +8999,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_f = NULL; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 * cdef int t * cdef char* f = NULL * cdef dtype descr = self.descr # <<<<<<<<<<<<<< @@ -9018,7 +9011,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_descr = ((PyArray_Descr *)__pyx_t_3); __pyx_t_3 = 0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":263 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":263 * cdef int offset * * cdef bint hasfields = PyDataType_HASFIELDS(descr) # <<<<<<<<<<<<<< @@ -9027,7 +9020,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_hasfields = PyDataType_HASFIELDS(__pyx_v_descr); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":265 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":265 * cdef bint hasfields = PyDataType_HASFIELDS(descr) * * if not hasfields and not copy_shape: # <<<<<<<<<<<<<< @@ -9045,7 +9038,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_L15_bool_binop_done:; if (__pyx_t_1) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":267 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":267 * if not hasfields and not copy_shape: * # do not call releasebuffer * info.obj = None # <<<<<<<<<<<<<< @@ -9058,7 +9051,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = Py_None; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":265 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":265 * cdef bint hasfields = PyDataType_HASFIELDS(descr) * * if not hasfields and not copy_shape: # <<<<<<<<<<<<<< @@ -9068,7 +9061,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L14; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":270 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":270 * else: * # need to call releasebuffer * info.obj = self # <<<<<<<<<<<<<< @@ -9084,7 +9077,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L14:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":272 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":272 * info.obj = self * * if not hasfields: # <<<<<<<<<<<<<< @@ -9094,7 +9087,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = ((!(__pyx_v_hasfields != 0)) != 0); if (__pyx_t_1) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":273 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":273 * * if not hasfields: * t = descr.type_num # <<<<<<<<<<<<<< @@ -9104,7 +9097,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_4 = __pyx_v_descr->type_num; __pyx_v_t = __pyx_t_4; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 * if not hasfields: * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -9124,7 +9117,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L20_next_or:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":275 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":275 * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<< @@ -9141,7 +9134,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = __pyx_t_2; __pyx_L19_bool_binop_done:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 * if not hasfields: * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -9150,7 +9143,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ if (__pyx_t_1) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -9163,7 +9156,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __PYX_ERR(1, 276, __pyx_L1_error) - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 * if not hasfields: * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -9172,7 +9165,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277 * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") * if t == NPY_BYTE: f = "b" # <<<<<<<<<<<<<< @@ -9184,7 +9177,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"b"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":278 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":278 * raise ValueError(u"Non-native byte order not supported") * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" # <<<<<<<<<<<<<< @@ -9195,7 +9188,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"B"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":279 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":279 * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" * elif t == NPY_SHORT: f = "h" # <<<<<<<<<<<<<< @@ -9206,7 +9199,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"h"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":280 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":280 * elif t == NPY_UBYTE: f = "B" * elif t == NPY_SHORT: f = "h" * elif t == NPY_USHORT: f = "H" # <<<<<<<<<<<<<< @@ -9217,7 +9210,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"H"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":281 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":281 * elif t == NPY_SHORT: f = "h" * elif t == NPY_USHORT: f = "H" * elif t == NPY_INT: f = "i" # <<<<<<<<<<<<<< @@ -9228,7 +9221,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"i"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":282 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":282 * elif t == NPY_USHORT: f = "H" * elif t == NPY_INT: f = "i" * elif t == NPY_UINT: f = "I" # <<<<<<<<<<<<<< @@ -9239,7 +9232,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"I"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":283 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":283 * elif t == NPY_INT: f = "i" * elif t == NPY_UINT: f = "I" * elif t == NPY_LONG: f = "l" # <<<<<<<<<<<<<< @@ -9250,7 +9243,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"l"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":284 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":284 * elif t == NPY_UINT: f = "I" * elif t == NPY_LONG: f = "l" * elif t == NPY_ULONG: f = "L" # <<<<<<<<<<<<<< @@ -9261,7 +9254,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"L"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":285 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":285 * elif t == NPY_LONG: f = "l" * elif t == NPY_ULONG: f = "L" * elif t == NPY_LONGLONG: f = "q" # <<<<<<<<<<<<<< @@ -9272,7 +9265,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"q"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":286 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":286 * elif t == NPY_ULONG: f = "L" * elif t == NPY_LONGLONG: f = "q" * elif t == NPY_ULONGLONG: f = "Q" # <<<<<<<<<<<<<< @@ -9283,7 +9276,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"Q"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":287 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":287 * elif t == NPY_LONGLONG: f = "q" * elif t == NPY_ULONGLONG: f = "Q" * elif t == NPY_FLOAT: f = "f" # <<<<<<<<<<<<<< @@ -9294,7 +9287,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"f"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":288 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":288 * elif t == NPY_ULONGLONG: f = "Q" * elif t == NPY_FLOAT: f = "f" * elif t == NPY_DOUBLE: f = "d" # <<<<<<<<<<<<<< @@ -9305,7 +9298,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"d"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":289 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":289 * elif t == NPY_FLOAT: f = "f" * elif t == NPY_DOUBLE: f = "d" * elif t == NPY_LONGDOUBLE: f = "g" # <<<<<<<<<<<<<< @@ -9316,7 +9309,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"g"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":290 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":290 * elif t == NPY_DOUBLE: f = "d" * elif t == NPY_LONGDOUBLE: f = "g" * elif t == NPY_CFLOAT: f = "Zf" # <<<<<<<<<<<<<< @@ -9327,7 +9320,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"Zf"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 * elif t == NPY_LONGDOUBLE: f = "g" * elif t == NPY_CFLOAT: f = "Zf" * elif t == NPY_CDOUBLE: f = "Zd" # <<<<<<<<<<<<<< @@ -9338,7 +9331,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"Zd"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":292 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":292 * elif t == NPY_CFLOAT: f = "Zf" * elif t == NPY_CDOUBLE: f = "Zd" * elif t == NPY_CLONGDOUBLE: f = "Zg" # <<<<<<<<<<<<<< @@ -9349,7 +9342,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = ((char *)"Zg"); break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":293 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":293 * elif t == NPY_CDOUBLE: f = "Zd" * elif t == NPY_CLONGDOUBLE: f = "Zg" * elif t == NPY_OBJECT: f = "O" # <<<<<<<<<<<<<< @@ -9361,7 +9354,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P break; default: - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":295 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":295 * elif t == NPY_OBJECT: f = "O" * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<< @@ -9387,7 +9380,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P break; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":296 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":296 * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * info.format = f # <<<<<<<<<<<<<< @@ -9396,7 +9389,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->format = __pyx_v_f; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":297 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":297 * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * info.format = f * return # <<<<<<<<<<<<<< @@ -9406,7 +9399,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_r = 0; goto __pyx_L0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":272 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":272 * info.obj = self * * if not hasfields: # <<<<<<<<<<<<<< @@ -9415,7 +9408,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":299 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":299 * return * else: * info.format = PyObject_Malloc(_buffer_format_string_len) # <<<<<<<<<<<<<< @@ -9425,7 +9418,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P /*else*/ { __pyx_v_info->format = ((char *)PyObject_Malloc(0xFF)); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":300 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":300 * else: * info.format = PyObject_Malloc(_buffer_format_string_len) * info.format[0] = c'^' # Native data types, manual alignment # <<<<<<<<<<<<<< @@ -9434,7 +9427,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ (__pyx_v_info->format[0]) = '^'; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":301 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":301 * info.format = PyObject_Malloc(_buffer_format_string_len) * info.format[0] = c'^' # Native data types, manual alignment * offset = 0 # <<<<<<<<<<<<<< @@ -9443,7 +9436,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_offset = 0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":302 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":302 * info.format[0] = c'^' # Native data types, manual alignment * offset = 0 * f = _util_dtypestring(descr, info.format + 1, # <<<<<<<<<<<<<< @@ -9453,7 +9446,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_7 = __pyx_f_5numpy__util_dtypestring(__pyx_v_descr, (__pyx_v_info->format + 1), (__pyx_v_info->format + 0xFF), (&__pyx_v_offset)); if (unlikely(__pyx_t_7 == ((char *)NULL))) __PYX_ERR(1, 302, __pyx_L1_error) __pyx_v_f = __pyx_t_7; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":305 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":305 * info.format + _buffer_format_string_len, * &offset) * f[0] = c'\0' # Terminate format string # <<<<<<<<<<<<<< @@ -9463,7 +9456,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P (__pyx_v_f[0]) = '\x00'; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":214 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":214 * # experimental exception made for __getbuffer__ and __releasebuffer__ * # -- the details of this may change. * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< @@ -9495,7 +9488,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P return __pyx_r; } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":307 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":307 * f[0] = c'\0' # Terminate format string * * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<< @@ -9519,7 +9512,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s int __pyx_t_1; __Pyx_RefNannySetupContext("__releasebuffer__", 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":308 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":308 * * def __releasebuffer__(ndarray self, Py_buffer* info): * if PyArray_HASFIELDS(self): # <<<<<<<<<<<<<< @@ -9529,7 +9522,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __pyx_t_1 = (PyArray_HASFIELDS(__pyx_v_self) != 0); if (__pyx_t_1) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":309 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":309 * def __releasebuffer__(ndarray self, Py_buffer* info): * if PyArray_HASFIELDS(self): * PyObject_Free(info.format) # <<<<<<<<<<<<<< @@ -9538,7 +9531,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s */ PyObject_Free(__pyx_v_info->format); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":308 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":308 * * def __releasebuffer__(ndarray self, Py_buffer* info): * if PyArray_HASFIELDS(self): # <<<<<<<<<<<<<< @@ -9547,7 +9540,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s */ } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":310 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":310 * if PyArray_HASFIELDS(self): * PyObject_Free(info.format) * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< @@ -9557,7 +9550,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0); if (__pyx_t_1) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":311 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":311 * PyObject_Free(info.format) * if sizeof(npy_intp) != sizeof(Py_ssize_t): * PyObject_Free(info.strides) # <<<<<<<<<<<<<< @@ -9566,7 +9559,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s */ PyObject_Free(__pyx_v_info->strides); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":310 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":310 * if PyArray_HASFIELDS(self): * PyObject_Free(info.format) * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< @@ -9575,7 +9568,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s */ } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":307 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":307 * f[0] = c'\0' # Terminate format string * * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<< @@ -9587,7 +9580,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __Pyx_RefNannyFinishContext(); } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":788 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":788 * ctypedef npy_cdouble complex_t * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< @@ -9601,7 +9594,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":789 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":789 * * cdef inline object PyArray_MultiIterNew1(a): * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<< @@ -9615,7 +9608,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":788 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":788 * ctypedef npy_cdouble complex_t * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< @@ -9634,7 +9627,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ return __pyx_r; } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":791 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":791 * return PyArray_MultiIterNew(1, a) * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< @@ -9648,7 +9641,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":792 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":792 * * cdef inline object PyArray_MultiIterNew2(a, b): * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<< @@ -9662,7 +9655,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":791 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":791 * return PyArray_MultiIterNew(1, a) * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< @@ -9681,7 +9674,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ return __pyx_r; } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 * return PyArray_MultiIterNew(2, a, b) * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< @@ -9695,7 +9688,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":795 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":795 * * cdef inline object PyArray_MultiIterNew3(a, b, c): * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<< @@ -9709,7 +9702,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 * return PyArray_MultiIterNew(2, a, b) * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< @@ -9728,7 +9721,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ return __pyx_r; } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 * return PyArray_MultiIterNew(3, a, b, c) * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< @@ -9742,7 +9735,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":798 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":798 * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<< @@ -9756,7 +9749,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 * return PyArray_MultiIterNew(3, a, b, c) * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< @@ -9775,7 +9768,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ return __pyx_r; } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":800 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":800 * return PyArray_MultiIterNew(4, a, b, c, d) * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< @@ -9789,7 +9782,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ PyObject *__pyx_t_1 = NULL; __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":801 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":801 * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<< @@ -9803,7 +9796,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":800 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":800 * return PyArray_MultiIterNew(4, a, b, c, d) * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< @@ -9822,7 +9815,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ return __pyx_r; } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":803 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":803 * return PyArray_MultiIterNew(5, a, b, c, d, e) * * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< @@ -9836,7 +9829,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ int __pyx_t_1; __Pyx_RefNannySetupContext("PyDataType_SHAPE", 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":804 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":804 * * cdef inline tuple PyDataType_SHAPE(dtype d): * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< @@ -9846,7 +9839,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ __pyx_t_1 = (PyDataType_HASSUBARRAY(__pyx_v_d) != 0); if (__pyx_t_1) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":805 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":805 * cdef inline tuple PyDataType_SHAPE(dtype d): * if PyDataType_HASSUBARRAY(d): * return d.subarray.shape # <<<<<<<<<<<<<< @@ -9858,7 +9851,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ __pyx_r = ((PyObject*)__pyx_v_d->subarray->shape); goto __pyx_L0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":804 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":804 * * cdef inline tuple PyDataType_SHAPE(dtype d): * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< @@ -9867,7 +9860,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ */ } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":807 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":807 * return d.subarray.shape * else: * return () # <<<<<<<<<<<<<< @@ -9881,7 +9874,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ goto __pyx_L0; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":803 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":803 * return PyArray_MultiIterNew(5, a, b, c, d, e) * * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< @@ -9896,7 +9889,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ return __pyx_r; } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":809 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":809 * return () * * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<< @@ -9925,7 +9918,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx char *__pyx_t_9; __Pyx_RefNannySetupContext("_util_dtypestring", 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":814 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":814 * * cdef dtype child * cdef int endian_detector = 1 # <<<<<<<<<<<<<< @@ -9934,7 +9927,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_endian_detector = 1; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":815 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":815 * cdef dtype child * cdef int endian_detector = 1 * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<< @@ -9943,7 +9936,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":818 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":818 * cdef tuple fields * * for childname in descr.names: # <<<<<<<<<<<<<< @@ -9966,7 +9959,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_childname, __pyx_t_3); __pyx_t_3 = 0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":819 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":819 * * for childname in descr.names: * fields = descr.fields[childname] # <<<<<<<<<<<<<< @@ -9983,7 +9976,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_fields, ((PyObject*)__pyx_t_3)); __pyx_t_3 = 0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":820 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":820 * for childname in descr.names: * fields = descr.fields[childname] * child, new_offset = fields # <<<<<<<<<<<<<< @@ -10022,7 +10015,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_new_offset, __pyx_t_4); __pyx_t_4 = 0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":822 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":822 * child, new_offset = fields * * if (end - f) - (new_offset - offset[0]) < 15: # <<<<<<<<<<<<<< @@ -10039,7 +10032,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = ((((__pyx_v_end - __pyx_v_f) - ((int)__pyx_t_5)) < 15) != 0); if (__pyx_t_6) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 * * if (end - f) - (new_offset - offset[0]) < 15: * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") # <<<<<<<<<<<<<< @@ -10052,7 +10045,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __PYX_ERR(1, 823, __pyx_L1_error) - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":822 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":822 * child, new_offset = fields * * if (end - f) - (new_offset - offset[0]) < 15: # <<<<<<<<<<<<<< @@ -10061,7 +10054,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") * * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -10081,7 +10074,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __pyx_L8_next_or:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 * * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<< @@ -10098,7 +10091,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = __pyx_t_7; __pyx_L7_bool_binop_done:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") * * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -10107,7 +10100,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ if (__pyx_t_6) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":827 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":827 * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -10120,7 +10113,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __PYX_ERR(1, 827, __pyx_L1_error) - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") * * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -10129,7 +10122,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":837 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":837 * * # Output padding bytes * while offset[0] < new_offset: # <<<<<<<<<<<<<< @@ -10145,7 +10138,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (!__pyx_t_6) break; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":838 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":838 * # Output padding bytes * while offset[0] < new_offset: * f[0] = 120 # "x"; pad byte # <<<<<<<<<<<<<< @@ -10154,7 +10147,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ (__pyx_v_f[0]) = 0x78; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":839 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":839 * while offset[0] < new_offset: * f[0] = 120 # "x"; pad byte * f += 1 # <<<<<<<<<<<<<< @@ -10163,7 +10156,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_f = (__pyx_v_f + 1); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":840 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":840 * f[0] = 120 # "x"; pad byte * f += 1 * offset[0] += 1 # <<<<<<<<<<<<<< @@ -10174,7 +10167,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + 1); } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":842 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":842 * offset[0] += 1 * * offset[0] += child.itemsize # <<<<<<<<<<<<<< @@ -10184,7 +10177,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_8 = 0; (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + __pyx_v_child->elsize); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":844 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":844 * offset[0] += child.itemsize * * if not PyDataType_HASFIELDS(child): # <<<<<<<<<<<<<< @@ -10194,7 +10187,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = ((!(PyDataType_HASFIELDS(__pyx_v_child) != 0)) != 0); if (__pyx_t_6) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":845 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":845 * * if not PyDataType_HASFIELDS(child): * t = child.type_num # <<<<<<<<<<<<<< @@ -10206,7 +10199,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_t, __pyx_t_4); __pyx_t_4 = 0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":846 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":846 * if not PyDataType_HASFIELDS(child): * t = child.type_num * if end - f < 5: # <<<<<<<<<<<<<< @@ -10216,7 +10209,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = (((__pyx_v_end - __pyx_v_f) < 5) != 0); if (__pyx_t_6) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":847 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":847 * t = child.type_num * if end - f < 5: * raise RuntimeError(u"Format string allocated too short.") # <<<<<<<<<<<<<< @@ -10229,7 +10222,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __PYX_ERR(1, 847, __pyx_L1_error) - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":846 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":846 * if not PyDataType_HASFIELDS(child): * t = child.type_num * if end - f < 5: # <<<<<<<<<<<<<< @@ -10238,7 +10231,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":850 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":850 * * # Until ticket #99 is fixed, use integers to avoid warnings * if t == NPY_BYTE: f[0] = 98 #"b" # <<<<<<<<<<<<<< @@ -10256,7 +10249,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":851 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":851 * # Until ticket #99 is fixed, use integers to avoid warnings * if t == NPY_BYTE: f[0] = 98 #"b" * elif t == NPY_UBYTE: f[0] = 66 #"B" # <<<<<<<<<<<<<< @@ -10274,7 +10267,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":852 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":852 * if t == NPY_BYTE: f[0] = 98 #"b" * elif t == NPY_UBYTE: f[0] = 66 #"B" * elif t == NPY_SHORT: f[0] = 104 #"h" # <<<<<<<<<<<<<< @@ -10292,7 +10285,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":853 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":853 * elif t == NPY_UBYTE: f[0] = 66 #"B" * elif t == NPY_SHORT: f[0] = 104 #"h" * elif t == NPY_USHORT: f[0] = 72 #"H" # <<<<<<<<<<<<<< @@ -10310,7 +10303,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":854 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":854 * elif t == NPY_SHORT: f[0] = 104 #"h" * elif t == NPY_USHORT: f[0] = 72 #"H" * elif t == NPY_INT: f[0] = 105 #"i" # <<<<<<<<<<<<<< @@ -10328,7 +10321,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":855 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":855 * elif t == NPY_USHORT: f[0] = 72 #"H" * elif t == NPY_INT: f[0] = 105 #"i" * elif t == NPY_UINT: f[0] = 73 #"I" # <<<<<<<<<<<<<< @@ -10346,7 +10339,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":856 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":856 * elif t == NPY_INT: f[0] = 105 #"i" * elif t == NPY_UINT: f[0] = 73 #"I" * elif t == NPY_LONG: f[0] = 108 #"l" # <<<<<<<<<<<<<< @@ -10364,7 +10357,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":857 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":857 * elif t == NPY_UINT: f[0] = 73 #"I" * elif t == NPY_LONG: f[0] = 108 #"l" * elif t == NPY_ULONG: f[0] = 76 #"L" # <<<<<<<<<<<<<< @@ -10382,7 +10375,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":858 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":858 * elif t == NPY_LONG: f[0] = 108 #"l" * elif t == NPY_ULONG: f[0] = 76 #"L" * elif t == NPY_LONGLONG: f[0] = 113 #"q" # <<<<<<<<<<<<<< @@ -10400,7 +10393,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":859 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":859 * elif t == NPY_ULONG: f[0] = 76 #"L" * elif t == NPY_LONGLONG: f[0] = 113 #"q" * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" # <<<<<<<<<<<<<< @@ -10418,7 +10411,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":860 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":860 * elif t == NPY_LONGLONG: f[0] = 113 #"q" * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" * elif t == NPY_FLOAT: f[0] = 102 #"f" # <<<<<<<<<<<<<< @@ -10436,7 +10429,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":861 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":861 * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" * elif t == NPY_FLOAT: f[0] = 102 #"f" * elif t == NPY_DOUBLE: f[0] = 100 #"d" # <<<<<<<<<<<<<< @@ -10454,7 +10447,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":862 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":862 * elif t == NPY_FLOAT: f[0] = 102 #"f" * elif t == NPY_DOUBLE: f[0] = 100 #"d" * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" # <<<<<<<<<<<<<< @@ -10472,7 +10465,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":863 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":863 * elif t == NPY_DOUBLE: f[0] = 100 #"d" * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf # <<<<<<<<<<<<<< @@ -10492,7 +10485,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":864 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":864 * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd # <<<<<<<<<<<<<< @@ -10512,7 +10505,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":865 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":865 * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg # <<<<<<<<<<<<<< @@ -10532,7 +10525,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":866 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":866 * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg * elif t == NPY_OBJECT: f[0] = 79 #"O" # <<<<<<<<<<<<<< @@ -10550,7 +10543,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":868 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":868 * elif t == NPY_OBJECT: f[0] = 79 #"O" * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<< @@ -10574,7 +10567,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __pyx_L15:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":869 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":869 * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * f += 1 # <<<<<<<<<<<<<< @@ -10583,7 +10576,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_f = (__pyx_v_f + 1); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":844 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":844 * offset[0] += child.itemsize * * if not PyDataType_HASFIELDS(child): # <<<<<<<<<<<<<< @@ -10593,7 +10586,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L13; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":873 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":873 * # Cython ignores struct boundary information ("T{...}"), * # so don't output it * f = _util_dtypestring(child, f, end, offset) # <<<<<<<<<<<<<< @@ -10606,7 +10599,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __pyx_L13:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":818 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":818 * cdef tuple fields * * for childname in descr.names: # <<<<<<<<<<<<<< @@ -10616,7 +10609,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":874 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":874 * # so don't output it * f = _util_dtypestring(child, f, end, offset) * return f # <<<<<<<<<<<<<< @@ -10626,7 +10619,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_r = __pyx_v_f; goto __pyx_L0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":809 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":809 * return () * * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<< @@ -10651,7 +10644,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx return __pyx_r; } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":990 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":990 * * * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< @@ -10666,7 +10659,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a int __pyx_t_2; __Pyx_RefNannySetupContext("set_array_base", 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":992 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":992 * cdef inline void set_array_base(ndarray arr, object base): * cdef PyObject* baseptr * if base is None: # <<<<<<<<<<<<<< @@ -10677,7 +10670,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":993 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":993 * cdef PyObject* baseptr * if base is None: * baseptr = NULL # <<<<<<<<<<<<<< @@ -10686,7 +10679,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ __pyx_v_baseptr = NULL; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":992 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":992 * cdef inline void set_array_base(ndarray arr, object base): * cdef PyObject* baseptr * if base is None: # <<<<<<<<<<<<<< @@ -10696,7 +10689,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a goto __pyx_L3; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":995 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":995 * baseptr = NULL * else: * Py_INCREF(base) # important to do this before decref below! # <<<<<<<<<<<<<< @@ -10706,7 +10699,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a /*else*/ { Py_INCREF(__pyx_v_base); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":996 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":996 * else: * Py_INCREF(base) # important to do this before decref below! * baseptr = base # <<<<<<<<<<<<<< @@ -10717,7 +10710,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a } __pyx_L3:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":997 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":997 * Py_INCREF(base) # important to do this before decref below! * baseptr = base * Py_XDECREF(arr.base) # <<<<<<<<<<<<<< @@ -10726,7 +10719,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ Py_XDECREF(__pyx_v_arr->base); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":998 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":998 * baseptr = base * Py_XDECREF(arr.base) * arr.base = baseptr # <<<<<<<<<<<<<< @@ -10735,7 +10728,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ __pyx_v_arr->base = __pyx_v_baseptr; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":990 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":990 * * * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< @@ -10747,7 +10740,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a __Pyx_RefNannyFinishContext(); } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1000 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1000 * arr.base = baseptr * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -10761,7 +10754,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py int __pyx_t_1; __Pyx_RefNannySetupContext("get_array_base", 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1001 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1001 * * cdef inline object get_array_base(ndarray arr): * if arr.base is NULL: # <<<<<<<<<<<<<< @@ -10771,7 +10764,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __pyx_t_1 = ((__pyx_v_arr->base == NULL) != 0); if (__pyx_t_1) { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1002 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1002 * cdef inline object get_array_base(ndarray arr): * if arr.base is NULL: * return None # <<<<<<<<<<<<<< @@ -10783,7 +10776,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __pyx_r = Py_None; goto __pyx_L0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1001 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1001 * * cdef inline object get_array_base(ndarray arr): * if arr.base is NULL: # <<<<<<<<<<<<<< @@ -10792,7 +10785,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py */ } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1004 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1004 * return None * else: * return arr.base # <<<<<<<<<<<<<< @@ -10806,7 +10799,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py goto __pyx_L0; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1000 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1000 * arr.base = baseptr * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -10821,7 +10814,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py return __pyx_r; } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1009 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1009 * # Versions of the import_* functions which are more suitable for * # Cython code. * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< @@ -10842,7 +10835,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { PyObject *__pyx_t_8 = NULL; __Pyx_RefNannySetupContext("import_array", 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1010 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1010 * # Cython code. * cdef inline int import_array() except -1: * try: # <<<<<<<<<<<<<< @@ -10858,7 +10851,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __Pyx_XGOTREF(__pyx_t_3); /*try:*/ { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1011 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1011 * cdef inline int import_array() except -1: * try: * _import_array() # <<<<<<<<<<<<<< @@ -10867,7 +10860,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { */ __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1011, __pyx_L3_error) - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1010 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1010 * # Cython code. * cdef inline int import_array() except -1: * try: # <<<<<<<<<<<<<< @@ -10881,7 +10874,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { goto __pyx_L8_try_end; __pyx_L3_error:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1012 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1012 * try: * _import_array() * except Exception: # <<<<<<<<<<<<<< @@ -10896,7 +10889,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __Pyx_GOTREF(__pyx_t_6); __Pyx_GOTREF(__pyx_t_7); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1013 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1013 * _import_array() * except Exception: * raise ImportError("numpy.core.multiarray failed to import") # <<<<<<<<<<<<<< @@ -10912,7 +10905,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { goto __pyx_L5_except_error; __pyx_L5_except_error:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1010 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1010 * # Cython code. * cdef inline int import_array() except -1: * try: # <<<<<<<<<<<<<< @@ -10927,7 +10920,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __pyx_L8_try_end:; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1009 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1009 * # Versions of the import_* functions which are more suitable for * # Cython code. * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< @@ -10950,7 +10943,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { return __pyx_r; } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1015 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1015 * raise ImportError("numpy.core.multiarray failed to import") * * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< @@ -10971,7 +10964,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { PyObject *__pyx_t_8 = NULL; __Pyx_RefNannySetupContext("import_umath", 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1016 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1016 * * cdef inline int import_umath() except -1: * try: # <<<<<<<<<<<<<< @@ -10987,7 +10980,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __Pyx_XGOTREF(__pyx_t_3); /*try:*/ { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1017 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1017 * cdef inline int import_umath() except -1: * try: * _import_umath() # <<<<<<<<<<<<<< @@ -10996,7 +10989,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { */ __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1017, __pyx_L3_error) - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1016 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1016 * * cdef inline int import_umath() except -1: * try: # <<<<<<<<<<<<<< @@ -11010,7 +11003,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { goto __pyx_L8_try_end; __pyx_L3_error:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1018 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1018 * try: * _import_umath() * except Exception: # <<<<<<<<<<<<<< @@ -11025,7 +11018,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __Pyx_GOTREF(__pyx_t_6); __Pyx_GOTREF(__pyx_t_7); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1019 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1019 * _import_umath() * except Exception: * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< @@ -11041,7 +11034,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { goto __pyx_L5_except_error; __pyx_L5_except_error:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1016 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1016 * * cdef inline int import_umath() except -1: * try: # <<<<<<<<<<<<<< @@ -11056,7 +11049,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __pyx_L8_try_end:; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1015 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1015 * raise ImportError("numpy.core.multiarray failed to import") * * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< @@ -11079,7 +11072,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { return __pyx_r; } -/* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021 +/* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021 * raise ImportError("numpy.core.umath failed to import") * * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< @@ -11100,7 +11093,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { PyObject *__pyx_t_8 = NULL; __Pyx_RefNannySetupContext("import_ufunc", 0); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1022 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1022 * * cdef inline int import_ufunc() except -1: * try: # <<<<<<<<<<<<<< @@ -11116,7 +11109,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __Pyx_XGOTREF(__pyx_t_3); /*try:*/ { - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1023 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1023 * cdef inline int import_ufunc() except -1: * try: * _import_umath() # <<<<<<<<<<<<<< @@ -11125,7 +11118,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { */ __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1023, __pyx_L3_error) - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1022 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1022 * * cdef inline int import_ufunc() except -1: * try: # <<<<<<<<<<<<<< @@ -11139,7 +11132,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { goto __pyx_L8_try_end; __pyx_L3_error:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1024 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1024 * try: * _import_umath() * except Exception: # <<<<<<<<<<<<<< @@ -11153,7 +11146,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __Pyx_GOTREF(__pyx_t_6); __Pyx_GOTREF(__pyx_t_7); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1025 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1025 * _import_umath() * except Exception: * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< @@ -11167,7 +11160,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { goto __pyx_L5_except_error; __pyx_L5_except_error:; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1022 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1022 * * cdef inline int import_ufunc() except -1: * try: # <<<<<<<<<<<<<< @@ -11182,7 +11175,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __pyx_L8_try_end:; } - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021 * raise ImportError("numpy.core.umath failed to import") * * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< @@ -11223,7 +11216,7 @@ static PyModuleDef_Slot __pyx_moduledef_slots[] = { static struct PyModuleDef __pyx_moduledef = { PyModuleDef_HEAD_INIT, "doc2vec_inner", - 0, /* m_doc */ + __pyx_k_Optimized_cython_functions_for_t, /* m_doc */ #if CYTHON_PEP489_MULTI_PHASE_INIT 0, /* m_size */ #else @@ -11242,7 +11235,6 @@ static struct PyModuleDef __pyx_moduledef = { #endif static __Pyx_StringTabEntry __pyx_string_tab[] = { - {&__pyx_n_s_FAST_VERSION, __pyx_k_FAST_VERSION, sizeof(__pyx_k_FAST_VERSION), 0, 0, 1, 1}, {&__pyx_kp_u_Format_string_allocated_too_shor, __pyx_k_Format_string_allocated_too_shor, sizeof(__pyx_k_Format_string_allocated_too_shor), 0, 1, 0, 0}, {&__pyx_kp_u_Format_string_allocated_too_shor_2, __pyx_k_Format_string_allocated_too_shor_2, sizeof(__pyx_k_Format_string_allocated_too_shor_2), 0, 1, 0, 0}, {&__pyx_n_s_ImportError, __pyx_k_ImportError, sizeof(__pyx_k_ImportError), 0, 0, 1, 1}, @@ -11345,7 +11337,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_vocabulary, __pyx_k_vocabulary, sizeof(__pyx_k_vocabulary), 0, 0, 1, 1}, {&__pyx_n_s_window, __pyx_k_window, sizeof(__pyx_k_window), 0, 0, 1, 1}, {&__pyx_n_s_window_indexes, __pyx_k_window_indexes, sizeof(__pyx_k_window_indexes), 0, 0, 1, 1}, - {&__pyx_n_s_word2vec, __pyx_k_word2vec, sizeof(__pyx_k_word2vec), 0, 0, 1, 1}, {&__pyx_n_s_word_locks, __pyx_k_word_locks, sizeof(__pyx_k_word_locks), 0, 0, 1, 1}, {&__pyx_n_s_word_locks_2, __pyx_k_word_locks_2, sizeof(__pyx_k_word_locks_2), 0, 0, 1, 1}, {&__pyx_n_s_word_vectors, __pyx_k_word_vectors, sizeof(__pyx_k_word_vectors), 0, 0, 1, 1}, @@ -11358,8 +11349,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { }; static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(0, 21, __pyx_L1_error) - __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 52, __pyx_L1_error) - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 46, __pyx_L1_error) + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 357, __pyx_L1_error) __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) __PYX_ERR(1, 235, __pyx_L1_error) __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) __PYX_ERR(1, 823, __pyx_L1_error) return 0; @@ -11371,49 +11362,49 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "gensim/models/doc2vec_inner.pyx":291 + /* "gensim/models/doc2vec_inner.pyx":329 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "gensim/models/doc2vec_inner.pyx":429 + /* "gensim/models/doc2vec_inner.pyx":512 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); - /* "gensim/models/doc2vec_inner.pyx":591 + /* "gensim/models/doc2vec_inner.pyx":719 * cum_table_len = len(model.vocabulary.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__6 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__6)) __PYX_ERR(0, 591, __pyx_L1_error) + __pyx_tuple__6 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__6)) __PYX_ERR(0, 719, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__6); __Pyx_GIVEREF(__pyx_tuple__6); - __pyx_tuple__7 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(0, 591, __pyx_L1_error) + __pyx_tuple__7 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(0, 719, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__7); __Pyx_GIVEREF(__pyx_tuple__7); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235 * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): * raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<< @@ -11424,7 +11415,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__8); __Pyx_GIVEREF(__pyx_tuple__8); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239 * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): * raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<< @@ -11435,7 +11426,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__9); __Pyx_GIVEREF(__pyx_tuple__9); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -11446,7 +11437,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__10); __Pyx_GIVEREF(__pyx_tuple__10); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 * * if (end - f) - (new_offset - offset[0]) < 15: * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") # <<<<<<<<<<<<<< @@ -11457,7 +11448,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__11); __Pyx_GIVEREF(__pyx_tuple__11); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":827 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":827 * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -11468,7 +11459,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__12); __Pyx_GIVEREF(__pyx_tuple__12); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":847 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":847 * t = child.type_num * if end - f < 5: * raise RuntimeError(u"Format string allocated too short.") # <<<<<<<<<<<<<< @@ -11479,7 +11470,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__13); __Pyx_GIVEREF(__pyx_tuple__13); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1013 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1013 * _import_array() * except Exception: * raise ImportError("numpy.core.multiarray failed to import") # <<<<<<<<<<<<<< @@ -11490,7 +11481,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__14); __Pyx_GIVEREF(__pyx_tuple__14); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1019 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1019 * _import_umath() * except Exception: * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< @@ -11501,7 +11492,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1025 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1025 * _import_umath() * except Exception: * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< @@ -11510,41 +11501,41 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__16); __Pyx_GIVEREF(__pyx_tuple__16); - /* "gensim/models/doc2vec_inner.pyx":227 + /* "gensim/models/doc2vec_inner.pyx":221 * * * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__18 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_doc_words, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_train_words_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_learn_doctags_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_r, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_predict_word, __pyx_n_s_item, __pyx_n_s_k); if (unlikely(!__pyx_tuple__18)) __PYX_ERR(0, 227, __pyx_L1_error) + __pyx_tuple__18 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_doc_words, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_train_words_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_learn_doctags_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_r, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_predict_word, __pyx_n_s_item, __pyx_n_s_k); if (unlikely(!__pyx_tuple__18)) __PYX_ERR(0, 221, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__18); __Pyx_GIVEREF(__pyx_tuple__18); - __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_gensim_models_doc2vec_inner_pyx, __pyx_n_s_train_document_dbow, 227, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) __PYX_ERR(0, 227, __pyx_L1_error) + __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_gensim_models_doc2vec_inner_pyx, __pyx_n_s_train_document_dbow, 221, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) __PYX_ERR(0, 221, __pyx_L1_error) - /* "gensim/models/doc2vec_inner.pyx":363 + /* "gensim/models/doc2vec_inner.pyx":401 * * * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__20 = PyTuple_Pack(53, __pyx_n_s_model, __pyx_n_s_doc_words, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_predict_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__20)) __PYX_ERR(0, 363, __pyx_L1_error) + __pyx_tuple__20 = PyTuple_Pack(53, __pyx_n_s_model, __pyx_n_s_doc_words, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_predict_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__20)) __PYX_ERR(0, 401, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__20); __Pyx_GIVEREF(__pyx_tuple__20); - __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(13, 0, 53, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_gensim_models_doc2vec_inner_pyx, __pyx_n_s_train_document_dm, 363, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) __PYX_ERR(0, 363, __pyx_L1_error) + __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(13, 0, 53, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_gensim_models_doc2vec_inner_pyx, __pyx_n_s_train_document_dm, 401, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) __PYX_ERR(0, 401, __pyx_L1_error) - /* "gensim/models/doc2vec_inner.pyx":521 + /* "gensim/models/doc2vec_inner.pyx":604 * * * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__22 = PyTuple_Pack(53, __pyx_n_s_model, __pyx_n_s_doc_words, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_window_indexes, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_expected_doctag_len, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_predict_word); if (unlikely(!__pyx_tuple__22)) __PYX_ERR(0, 521, __pyx_L1_error) + __pyx_tuple__22 = PyTuple_Pack(53, __pyx_n_s_model, __pyx_n_s_doc_words, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_window_indexes, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_expected_doctag_len, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_predict_word); if (unlikely(!__pyx_tuple__22)) __PYX_ERR(0, 604, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__22); __Pyx_GIVEREF(__pyx_tuple__22); - __pyx_codeobj__23 = (PyObject*)__Pyx_PyCode_New(13, 0, 53, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__22, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_gensim_models_doc2vec_inner_pyx, __pyx_n_s_train_document_dm_concat, 521, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__23)) __PYX_ERR(0, 521, __pyx_L1_error) + __pyx_codeobj__23 = (PyObject*)__Pyx_PyCode_New(13, 0, 53, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__22, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_gensim_models_doc2vec_inner_pyx, __pyx_n_s_train_document_dm_concat, 604, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__23)) __PYX_ERR(0, 604, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -11670,7 +11661,7 @@ static int __pyx_pymod_exec_doc2vec_inner(PyObject *__pyx_pyinit_module) Py_INCREF(__pyx_m); #else #if PY_MAJOR_VERSION < 3 - __pyx_m = Py_InitModule4("doc2vec_inner", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); + __pyx_m = Py_InitModule4("doc2vec_inner", __pyx_methods, __pyx_k_Optimized_cython_functions_for_t, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); #else __pyx_m = PyModule_Create(&__pyx_moduledef); #endif @@ -11735,10 +11726,6 @@ static int __pyx_pymod_exec_doc2vec_inner(PyObject *__pyx_pyinit_module) Py_DECREF(__pyx_t_1); __pyx_t_1 = 0; /*--- Function import code ---*/ __pyx_t_2 = __Pyx_ImportModule("gensim.models.word2vec_inner"); if (!__pyx_t_2) __PYX_ERR(0, 1, __pyx_L1_error) - if (__Pyx_ImportFunction(__pyx_t_2, "our_dot_double", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_our_dot_double, "__pyx_t_6gensim_6models_14word2vec_inner_REAL_t (int const *, float const *, int const *, float const *, int const *)") < 0) __PYX_ERR(0, 1, __pyx_L1_error) - if (__Pyx_ImportFunction(__pyx_t_2, "our_dot_float", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_our_dot_float, "__pyx_t_6gensim_6models_14word2vec_inner_REAL_t (int const *, float const *, int const *, float const *, int const *)") < 0) __PYX_ERR(0, 1, __pyx_L1_error) - if (__Pyx_ImportFunction(__pyx_t_2, "our_dot_noblas", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas, "__pyx_t_6gensim_6models_14word2vec_inner_REAL_t (int const *, float const *, int const *, float const *, int const *)") < 0) __PYX_ERR(0, 1, __pyx_L1_error) - if (__Pyx_ImportFunction(__pyx_t_2, "our_saxpy_noblas", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas, "void (int const *, float const *, float const *, int const *, float *, int const *)") < 0) __PYX_ERR(0, 1, __pyx_L1_error) if (__Pyx_ImportFunction(__pyx_t_2, "bisect_left", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_bisect_left, "unsigned PY_LONG_LONG (__pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG)") < 0) __PYX_ERR(0, 1, __pyx_L1_error) if (__Pyx_ImportFunction(__pyx_t_2, "random_int32", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_random_int32, "unsigned PY_LONG_LONG (unsigned PY_LONG_LONG *)") < 0) __PYX_ERR(0, 1, __pyx_L1_error) Py_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -11747,26 +11734,26 @@ static int __pyx_pymod_exec_doc2vec_inner(PyObject *__pyx_pyinit_module) if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif - /* "gensim/models/doc2vec_inner.pyx":11 - * + /* "gensim/models/doc2vec_inner.pyx":12 + * """Optimized cython functions for training :class:`~gensim.models.doc2vec.Doc2Vec` model.""" * import cython * import numpy as np # <<<<<<<<<<<<<< * from numpy import zeros, float32 as REAL * cimport numpy as np */ - __pyx_t_3 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 11, __pyx_L1_error) + __pyx_t_3 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 12, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_3) < 0) __PYX_ERR(0, 11, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_3) < 0) __PYX_ERR(0, 12, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":12 + /* "gensim/models/doc2vec_inner.pyx":13 * import cython * import numpy as np * from numpy import zeros, float32 as REAL # <<<<<<<<<<<<<< * cimport numpy as np * */ - __pyx_t_3 = PyList_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 12, __pyx_L1_error) + __pyx_t_3 = PyList_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 13, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_n_s_zeros); __Pyx_GIVEREF(__pyx_n_s_zeros); @@ -11774,16 +11761,16 @@ static int __pyx_pymod_exec_doc2vec_inner(PyObject *__pyx_pyinit_module) __Pyx_INCREF(__pyx_n_s_float32); __Pyx_GIVEREF(__pyx_n_s_float32); PyList_SET_ITEM(__pyx_t_3, 1, __pyx_n_s_float32); - __pyx_t_4 = __Pyx_Import(__pyx_n_s_numpy, __pyx_t_3, -1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 12, __pyx_L1_error) + __pyx_t_4 = __Pyx_Import(__pyx_n_s_numpy, __pyx_t_3, -1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 13, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_4, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 12, __pyx_L1_error) + __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_4, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 13, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_zeros, __pyx_t_3) < 0) __PYX_ERR(0, 12, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_zeros, __pyx_t_3) < 0) __PYX_ERR(0, 13, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_4, __pyx_n_s_float32); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 12, __pyx_L1_error) + __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_4, __pyx_n_s_float32); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 13, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_3) < 0) __PYX_ERR(0, 12, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_3) < 0) __PYX_ERR(0, 13, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; @@ -11860,7 +11847,7 @@ static int __pyx_pymod_exec_doc2vec_inner(PyObject *__pyx_pyinit_module) * # in scipy > 0.15, fblas function has been removed * import scipy.linalg.blas as fblas # <<<<<<<<<<<<<< * - * from word2vec_inner cimport bisect_left, random_int32, \ + * from word2vec_inner cimport bisect_left, random_int32, sscal, REAL_t, EXP_TABLE, our_dot, our_saxpy */ __pyx_t_10 = PyList_New(1); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 23, __pyx_L4_except_error) __Pyx_GOTREF(__pyx_t_10); @@ -11900,28 +11887,7 @@ static int __pyx_pymod_exec_doc2vec_inner(PyObject *__pyx_pyinit_module) __pyx_L7_try_end:; } - /* "gensim/models/doc2vec_inner.pyx":31 - * our_dot_double, our_dot_float, our_dot_noblas, our_saxpy_noblas - * - * from word2vec import FAST_VERSION # <<<<<<<<<<<<<< - * - * DEF MAX_DOCUMENT_LEN = 10000 - */ - __pyx_t_9 = PyList_New(1); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 31, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - __Pyx_INCREF(__pyx_n_s_FAST_VERSION); - __Pyx_GIVEREF(__pyx_n_s_FAST_VERSION); - PyList_SET_ITEM(__pyx_t_9, 0, __pyx_n_s_FAST_VERSION); - __pyx_t_4 = __Pyx_Import(__pyx_n_s_word2vec, __pyx_t_9, -1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 31, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = __Pyx_ImportFrom(__pyx_t_4, __pyx_n_s_FAST_VERSION); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 31, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 31, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "gensim/models/doc2vec_inner.pyx":35 + /* "gensim/models/doc2vec_inner.pyx":29 * DEF MAX_DOCUMENT_LEN = 10000 * * cdef int ONE = 1 # <<<<<<<<<<<<<< @@ -11930,7 +11896,7 @@ static int __pyx_pymod_exec_doc2vec_inner(PyObject *__pyx_pyinit_module) */ __pyx_v_6gensim_6models_13doc2vec_inner_ONE = 1; - /* "gensim/models/doc2vec_inner.pyx":36 + /* "gensim/models/doc2vec_inner.pyx":30 * * cdef int ONE = 1 * cdef REAL_t ONEF = 1.0 # <<<<<<<<<<<<<< @@ -11939,53 +11905,53 @@ static int __pyx_pymod_exec_doc2vec_inner(PyObject *__pyx_pyinit_module) */ __pyx_v_6gensim_6models_13doc2vec_inner_ONEF = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)1.0); - /* "gensim/models/doc2vec_inner.pyx":227 + /* "gensim/models/doc2vec_inner.pyx":221 * * * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_t_4 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_13doc2vec_inner_1train_document_dbow, NULL, __pyx_n_s_gensim_models_doc2vec_inner); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 227, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dbow, __pyx_t_4) < 0) __PYX_ERR(0, 227, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_13doc2vec_inner_1train_document_dbow, NULL, __pyx_n_s_gensim_models_doc2vec_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 221, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dbow, __pyx_t_9) < 0) __PYX_ERR(0, 221, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/doc2vec_inner.pyx":363 + /* "gensim/models/doc2vec_inner.pyx":401 * * * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_t_4 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_13doc2vec_inner_3train_document_dm, NULL, __pyx_n_s_gensim_models_doc2vec_inner); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 363, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dm, __pyx_t_4) < 0) __PYX_ERR(0, 363, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_13doc2vec_inner_3train_document_dm, NULL, __pyx_n_s_gensim_models_doc2vec_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 401, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dm, __pyx_t_9) < 0) __PYX_ERR(0, 401, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/doc2vec_inner.pyx":521 + /* "gensim/models/doc2vec_inner.pyx":604 * * * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_t_4 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_13doc2vec_inner_5train_document_dm_concat, NULL, __pyx_n_s_gensim_models_doc2vec_inner); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 521, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dm_concat, __pyx_t_4) < 0) __PYX_ERR(0, 521, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_13doc2vec_inner_5train_document_dm_concat, NULL, __pyx_n_s_gensim_models_doc2vec_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 604, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dm_concat, __pyx_t_9) < 0) __PYX_ERR(0, 604, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; /* "gensim/models/doc2vec_inner.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< * # cython: boundscheck=False * # cython: wraparound=False */ - __pyx_t_4 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_4) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_9 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_9) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021 + /* "../../../../.local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021 * raise ImportError("numpy.core.umath failed to import") * * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index b41e8a8a3a..f4ed078713 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -2,17 +2,17 @@ # cython: boundscheck=False # cython: wraparound=False # cython: cdivision=True +# cython: embedsignature=True # coding: utf-8 # # Copyright (C) 2013 Radim Rehurek # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html - +"""Optimized cython functions for training :class:`~gensim.models.doc2vec.Doc2Vec` model.""" import cython import numpy as np from numpy import zeros, float32 as REAL cimport numpy as np -from libc.math cimport exp from libc.string cimport memset, memcpy # scipy <= 0.15 @@ -22,13 +22,7 @@ except ImportError: # in scipy > 0.15, fblas function has been removed import scipy.linalg.blas as fblas -from word2vec_inner cimport bisect_left, random_int32, \ - scopy, saxpy, sdot, dsdot, snrm2, sscal, \ - REAL_t, EXP_TABLE, \ - our_dot, our_saxpy, \ - our_dot_double, our_dot_float, our_dot_noblas, our_saxpy_noblas - -from word2vec import FAST_VERSION +from word2vec_inner cimport bisect_left, random_int32, sscal, REAL_t, EXP_TABLE, our_dot, our_saxpy DEF MAX_DOCUMENT_LEN = 10000 @@ -227,6 +221,50 @@ cdef unsigned long long fast_document_dmc_neg( def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): + """Update distributed bag of words model ("PV-DBOW") by training on a single document. + + Called internally from :meth:`~gensim.models.doc2vec.Doc2Vec.train` and + :meth:`~gensim.models.doc2vec.Doc2Vec.infer_vector`. + + Parameters + ---------- + model : :class:`~gensim.models.doc2vec.Doc2Vec` + The model to train. + doc_words : list of str + The input document as a list of words to be used for training. Each word will be looked up in + the model's vocabulary. + doctag_indexes : list of int + Indices into `doctag_vectors` used to obtain the tags of the document. + alpha : float + Learning rate. + work : list of float, optional + Updates to be performed on each neuron in the hidden layer of the underlying network. + train_words : bool, optional + Word vectors will be updated exactly as per Word2Vec skip-gram training only if **both** `learn_words` + and `train_words` are set to True. + learn_doctags : bool, optional + Whether the tag vectors should be updated. + learn_words : bool, optional + Word vectors will be updated exactly as per Word2Vec skip-gram training only if **both** + `learn_words` and `train_words` are set to True. + learn_hidden : bool, optional + Whether or not the weights of the hidden layer will be updated. + word_vectors : numpy.ndarray, optional + The vector representation for each word in the vocabulary. If None, these will be retrieved from the model. + word_locks : numpy.ndarray, optional + A learning lock factor for each weight in the hidden layer for words, value 0 completely blocks updates, + a value of 1 allows to update word-vectors. + doctag_vectors : numpy.ndarray, optional + Vector representations of the tags. If None, these will be retrieved from the model. + doctag_locks : numpy.ndarray, optional + The lock factors for each tag, same as `word_locks`, but for document-vectors. + + Returns + ------- + int + Number of words in the input document that were actually used for training. + + """ cdef int hs = model.hs cdef int negative = model.negative cdef int sample = (model.vocabulary.sample != 0) @@ -363,6 +401,51 @@ def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): + """Update distributed memory model ("PV-DM") by training on a single document. + This method implements the DM model with a projection (input) layer that is either the sum or mean of the context + vectors, depending on the model's `dm_mean` configuration field. + + Called internally from :meth:`~gensim.models.doc2vec.Doc2Vec.train` and + :meth:`~gensim.models.doc2vec.Doc2Vec.infer_vector`. + + Parameters + ---------- + model : :class:`~gensim.models.doc2vec.Doc2Vec` + The model to train. + doc_words : list of str + The input document as a list of words to be used for training. Each word will be looked up in + the model's vocabulary. + doctag_indexes : list of int + Indices into `doctag_vectors` used to obtain the tags of the document. + alpha : float + Learning rate. + work : np.ndarray, optional + Private working memory for each worker. + neu1 : np.ndarray, optional + Private working memory for each worker. + learn_doctags : bool, optional + Whether the tag vectors should be updated. + learn_words : bool, optional + Word vectors will be updated exactly as per Word2Vec skip-gram training only if **both** + `learn_words` and `train_words` are set to True. + learn_hidden : bool, optional + Whether or not the weights of the hidden layer will be updated. + word_vectors : numpy.ndarray, optional + The vector representation for each word in the vocabulary. If None, these will be retrieved from the model. + word_locks : numpy.ndarray, optional + A learning lock factor for each weight in the hidden layer for words, value 0 completely blocks updates, + a value of 1 allows to update word-vectors. + doctag_vectors : numpy.ndarray, optional + Vector representations of the tags. If None, these will be retrieved from the model. + doctag_locks : numpy.ndarray, optional + The lock factors for each tag, same as `word_locks`, but for document-vectors. + + Returns + ------- + int + Number of words in the input document that were actually used for training. + + """ cdef int hs = model.hs cdef int negative = model.negative cdef int sample = (model.vocabulary.sample != 0) @@ -521,6 +604,51 @@ def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=N def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): + """Update distributed memory model ("PV-DM") by training on a single document, using a concatenation of the context + window word vectors (rather than a sum or average). + This might be slower since the input at each batch will be significantly larger. + + Called internally from :meth:`~gensim.models.doc2vec.Doc2Vec.train` and + :meth:`~gensim.models.doc2vec.Doc2Vec.infer_vector`. + + Parameters + ---------- + model : :class:`~gensim.models.doc2vec.Doc2Vec` + The model to train. + doc_words : list of str + The input document as a list of words to be used for training. Each word will be looked up in + the model's vocabulary. + doctag_indexes : list of int + Indices into `doctag_vectors` used to obtain the tags of the document. + alpha : float, optional + Learning rate. + work : np.ndarray, optional + Private working memory for each worker. + neu1 : np.ndarray, optional + Private working memory for each worker. + learn_doctags : bool, optional + Whether the tag vectors should be updated. + learn_words : bool, optional + Word vectors will be updated exactly as per Word2Vec skip-gram training only if **both** + `learn_words` and `train_words` are set to True. + learn_hidden : bool, optional + Whether or not the weights of the hidden layer will be updated. + word_vectors : numpy.ndarray, optional + The vector representation for each word in the vocabulary. If None, these will be retrieved from the model. + word_locks : numpy.ndarray, optional + A learning lock factor for each weight in the hidden layer for words, value 0 completely blocks updates, + a value of 1 allows to update word-vectors. + doctag_vectors : numpy.ndarray, optional + Vector representations of the tags. If None, these will be retrieved from the model. + doctag_locks : numpy.ndarray, optional + The lock factors for each tag, same as `word_locks`, but for document-vectors. + + Returns + ------- + int + Number of words in the input document that were actually used for training. + + """ cdef int hs = model.hs cdef int negative = model.negative cdef int sample = (model.vocabulary.sample != 0) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 46e68b44ec..1bc0611479 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -6,28 +6,70 @@ # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html """Learn word representations via fasttext's "skip-gram and CBOW models", using either -hierarchical softmax or negative sampling [1]_. +hierarchical softmax or negative sampling `Enriching Word Vectors with Subword Information +`_. + + +This module allows training a word embedding from a training corpus with the additional ability to obtain word vectors +for out-of-vocabulary words. + +For a tutorial on gensim's native fasttext, refer to the `noteboook +`_. Notes ----- -There are more ways to get word vectors in Gensim than just FastText. -See wrappers for VarEmbed and WordRank or Word2Vec +**Make sure you have a C compiler before installing gensim, to use optimized (compiled) fasttext training** -This module allows training a word embedding from a training corpus with the additional ability -to obtain word vectors for out-of-vocabulary words. +Examples +-------- -For a tutorial on gensim's native fasttext, refer to the noteboook -- [2]_ +Initialize and train a model -**Make sure you have a C compiler before installing gensim, to use optimized (compiled) fasttext training** +>>> from gensim.test.utils import common_texts, get_tmpfile +>>> from gensim.models import FastText +>>> +>>> model = FastText(size=4, window=3, min_count=1) +>>> model.build_vocab(common_texts) +>>> model.train(common_texts, epochs=1, total_examples=model.corpus_count) -.. [1] P. Bojanowski, E. Grave, A. Joulin, T. Mikolov - Enriching Word Vectors with Subword Information. In arXiv preprint arXiv:1607.04606. - https://arxiv.org/abs/1607.04606 +Persist a model to disk with -.. [2] https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/FastText_Tutorial.ipynb +>>> tmp_fname = get_tmpfile("temp_fasttext.model") +>>> +>>> model.save(tmp_fname) +>>> model = FastText.load(tmp_fname) # you can continue training with the loaded model! -""" +Retrieve word-vector for vocab and out-of-vocab word (this is main feature of current model) + +>>> existent_word = "computer" +>>> computer_vec = model.wv[existent_word] # numpy vector of a word +>>> +>>> oov_word = "graph-out-of-vocab" +>>> oov_vec = model.wv[oov_word] # numpy vector for OOV word + +You can perform various NLP word tasks with the model, some of them are already built-in + +>>> similarities = model.wv.most_similar(positive=['computer', 'human'], negative=['interface']) +>>> most_similar = similarities[0] +>>> +>>> similarities = model.wv.most_similar_cosmul(positive=['computer', 'human'], negative=['interface']) +>>> most_similar = similarities[0] +>>> +>>> not_matching = model.wv.doesnt_match("human computer interface tree".split()) +>>> +>>> sim_score = model.wv.similarity('computer', 'human') + +Correlation with human opinion on word similarity + +>>> from gensim.test.utils import datapath +>>> +>>> similarities = model.wv.evaluate_word_pairs(datapath('wordsim353.tsv')) + +And on analogies + +>>> analogies_result = model.wv.accuracy(datapath('questions-words.txt')) +""" import logging import struct @@ -55,26 +97,31 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): """Update CBOW model by training on a sequence of sentences. - Each sentence is a list of string tokens, which are looked up in the model's - vocab dictionary. Called internally from :meth:`gensim.models.fasttext.FastText.train()`. - This is the non-optimized, Python version. If you have cython installed, gensim - will use the optimized version from fasttext_inner instead. + + Called internally from :meth:`~gensim.models.fasttext.FastText.train`. + + Notes + ----- + This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version + from :mod:`gensim.models.fasttext_inner` instead. + Parameters ---------- model : :class:`~gensim.models.fasttext.FastText` - `FastText` instance. - sentences : iterable of iterables - Iterable of the sentences directly from disk/network. + Model instance. + sentences : iterable of list of str + Iterable of the sentences. alpha : float Learning rate. - work : :class:`numpy.ndarray` - Private working memory for each worker. - neu1 : :class:`numpy.ndarray` - Private working memory for each worker. + work : :class:`numpy.ndarray`, optional + UNUSED. + neu1 : :class:`numpy.ndarray`, optional + UNUSED. Returns ------- int Effective number of words trained. + """ result = 0 for sentence in sentences: @@ -108,26 +155,32 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): def train_batch_sg(model, sentences, alpha, work=None, neu1=None): """Update skip-gram model by training on a sequence of sentences. - Each sentence is a list of string tokens, which are looked up in the model's - vocab dictionary. Called internally from :meth:`gensim.models.fasttext.FastText.train()`. - This is the non-optimized, Python version. If you have cython installed, gensim - will use the optimized version from fasttext_inner instead. + + Called internally from :meth:`~gensim.models.fasttext.FastText.train`. + + Notes + ----- + This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version + from :mod:`gensim.models.fasttext_inner` instead. + Parameters ---------- model : :class:`~gensim.models.fasttext.FastText` `FastText` instance. - sentences : iterable of iterables + sentences : iterable of list of str Iterable of the sentences directly from disk/network. alpha : float Learning rate. - work : :class:`numpy.ndarray` - Private working memory for each worker. - neu1 : :class:`numpy.ndarray` - Private working memory for each worker. + work : :class:`numpy.ndarray`, optional + UNUSED. + neu1 : :class:`numpy.ndarray`, optional + UNUSED. + Returns ------- int Effective number of words trained. + """ result = 0 for sentence in sentences: @@ -153,11 +206,32 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): class FastText(BaseWordEmbeddingsModel): """Class for training, using and evaluating word representations learned using method - described in [1]_ aka Fasttext. - - The model can be stored/loaded via its :meth:`~gensim.models.fasttext.FastText.save()` and - :meth:`~gensim.models.fasttext.FastText.load()` methods, or loaded in a format compatible with the original - fasttext implementation via :meth:`~gensim.models.fasttext.FastText.load_fasttext_format()`. + described in `Enriching Word Vectors with Subword Information `_, aka FastText. + + The model can be stored/loaded via its :meth:`~gensim.models.fasttext.FastText.save` and + :meth:`~gensim.models.fasttext.FastText.load` methods, or loaded in a format compatible with the original + fasttext implementation via :meth:`~gensim.models.fasttext.FastText.load_fasttext_format`. + + Some important attributes are the following: + + self.wv : :class:`~gensim.models.keyedvectors.FastTextKeyedVectors` + This object essentially contains the mapping between words and embeddings. These are similar to the embeddings + computed in the Word2Vec model, however here we also include vectors for n-grams. This allows the model to + compute embeddings even for **unseen** words (that do not exist in the vocabulary), as the aggregate of the + n-grams included in the word. After training the model, this attribute can be used directly to query those + embeddings in various ways. Check the module level docstring from some examples. + self.vocabulary : :class:'~gensim.models.fasttext.FastTextVocab` + This object represents the vocabulary (sometimes called Dictionary in gensim) of the model. + Besides keeping track of all unique words, this object provides extra functionality, such as + constructing a huffman tree (frequent words are closer to the root), or discarding extremely rare words. + self.trainables : :class:`~gensim.models.fasttext.FastTextTrainables` + This object represents the inner shallow neural network used to train the embeddings. This is very + similar to the network of the Word2Vec model, but it also trains weights for the N-Grams (sequences of more + than 1 words). The semantics of the network are almost the same as the one used for the Word2Vec model: + You can think of it as a NN with a single projection and hidden layer which we train on the corpus. + The weights are then used as our embeddings. An important difference however between the two models, is the + scoring function used to compute the loss. In the case of FastText, this is modified in word to also account + for the internal structure of words, besides their concurrence counts. """ def __init__(self, sentences=None, sg=0, hs=0, size=100, alpha=0.025, window=5, min_count=5, @@ -169,79 +243,85 @@ def __init__(self, sentences=None, sg=0, hs=0, size=100, alpha=0.025, window=5, Parameters ---------- - sentences : iterable of iterables - The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora, + sentences : iterable of list of str, optional + Can be simply a list of lists of tokens, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. If you don't supply `sentences`, the model is left uninitialized -- use if you plan to initialize it in some other way. - sg : int {1, 0} + sg : {1, 0}, optional Defines the training algorithm. If 1, skip-gram is used, otherwise, CBOW is employed. - size : int + size : int, optional Dimensionality of the feature vectors. - window : int + window : int, optional The maximum distance between the current and predicted word within a sentence. - alpha : float + alpha : float, optional The initial learning rate. - min_alpha : float + min_alpha : float, optional Learning rate will linearly drop to `min_alpha` as training progresses. - seed : int + seed : int, optional Seed for the random number generator. Initial vectors for each word are seeded with a hash of the concatenation of word + `str(seed)`. Note that for a fully deterministically-reproducible run, you must also limit the model to a single worker thread (`workers=1`), to eliminate ordering jitter from OS thread scheduling. (In Python 3, reproducibility between interpreter launches also requires use of the `PYTHONHASHSEED` environment variable to control hash randomization). - min_count : int - Ignores all words with total frequency lower than this. - max_vocab_size : int + min_count : int, optional + The model ignores all words with total frequency lower than this. + max_vocab_size : int, optional Limits the RAM during vocabulary building; if there are more unique words than this, then prune the infrequent ones. Every 10 million word types need about 1GB of RAM. Set to `None` for no limit. - sample : float + sample : float, optional The threshold for configuring which higher-frequency words are randomly downsampled, useful range is (0, 1e-5). - workers : int + workers : int, optional Use these many worker threads to train the model (=faster training with multicore machines). - hs : int {1,0} + hs : {1,0}, optional If 1, hierarchical softmax will be used for model training. If set to 0, and `negative` is non-zero, negative sampling will be used. - negative : int + negative : int, optional If > 0, negative sampling will be used, the int for negative specifies how many "noise words" should be drawn (usually between 5-20). If set to 0, no negative sampling is used. - cbow_mean : int {1,0} + cbow_mean : {1,0}, optional If 0, use the sum of the context word vectors. If 1, use the mean, only applies when cbow is used. - hashfxn : function + hashfxn : function, optional Hash function to use to randomly initialize weights, for increased training reproducibility. - iter : int + iter : int, optional Number of iterations (epochs) over the corpus. - trim_rule : function + trim_rule : function, optional Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`), or a callable that accepts parameters (word, count, min_count) and returns either :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`. - Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part - of the model. - sorted_vocab : int {1,0} - If 1, sort the vocabulary by descending frequency before assigning word indexes. - batch_words : int + The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part of the + model. + + The input parameters are of the following types: + * `word` (str) - the word we are examining + * `count` (int) - the word's frequency count in the corpus + * `min_count` (int) - the minimum count threshold. + + sorted_vocab : {1,0}, optional + If 1, sort the vocabulary by descending frequency before assigning word indices. + batch_words : int, optional Target size (in words) for batches of examples passed to worker threads (and thus cython routines).(Larger batches will be passed if individual texts are longer than 10000 words, but the standard cython code truncates to that maximum.) - min_n : int - Min length of char ngrams to be used for training word representations. - max_n : int + min_n : int, optional + Minimum length of char n-grams to be used for training word representations. + max_n : int, optional Max length of char ngrams to be used for training word representations. Set `max_n` to be lesser than `min_n` to avoid char ngrams being used. - word_ngrams : int {1,0} - If 1, uses enriches word vectors with subword(ngrams) information. + word_ngrams : {1,0}, optional + If 1, uses enriches word vectors with subword(n-grams) information. If 0, this is equivalent to word2vec. - bucket : int + bucket : int, optional Character ngrams are hashed into a fixed number of buckets, in order to limit the memory usage of the model. This option specifies the number of buckets used by the model. - callbacks : :obj: `list` of :obj: `~gensim.models.callbacks.CallbackAny2Vec` + callbacks : :obj: `list` of :obj: `~gensim.models.callbacks.CallbackAny2Vec`, optional List of callbacks that need to be executed/run at specific stages during training. Examples @@ -255,7 +335,6 @@ def __init__(self, sentences=None, sg=0, hs=0, size=100, alpha=0.025, window=5, >>> say_vector = model['say'] # get vector for word >>> of_vector = model['of'] # get vector for out-of-vocab word - """ self.load = call_on_class_only self.load_fasttext_format = call_on_class_only @@ -333,28 +412,37 @@ def build_vocab(self, sentences, update=False, progress_per=10000, keep_raw_voca Parameters ---------- - sentences : iterable of iterables - The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora, + sentences : iterable of list of str + Can be simply a list of lists of tokens, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. + update : bool + If true, the new words in `sentences` will be added to model's vocab. + progress_per : int + Indicates how many words to process before showing/updating the progress. keep_raw_vocab : bool If not true, delete the raw vocabulary after the scaling is done and free up RAM. - trim_rule : function + trim_rule : function, optional Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`), or a callable that accepts parameters (word, count, min_count) and returns either :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`. - Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part - of the model. - progress_per : int - Indicates how many words to process before showing/updating the progress. - update : bool - If true, the new words in `sentences` will be added to model's vocab. + The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part of the + model. - Example - ------- + The input parameters are of the following types: + * `word` (str) - the word we are examining + * `count` (int) - the word's frequency count in the corpus + * `min_count` (int) - the minimum count threshold. + + **kwargs + Additional key word parameters passed to + :meth:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.build_vocab`. + + Examples + -------- Train a model and update vocab for online training >>> from gensim.models import FastText @@ -385,6 +473,7 @@ def _set_train_params(self, **kwargs): pass def _clear_post_train(self): + """Clears the model's internal structures after training has finished to free up RAM. """ self.wv.vectors_norm = None self.wv.vectors_vocab_norm = None self.wv.vectors_ngrams_norm = None @@ -432,14 +521,14 @@ def _do_train_job(self, sentences, alpha, inits): Parameters ---------- - sentences : iterable of iterables - The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora, + sentences : iterable of list of str + Can be simply a list of lists of tokens, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. alpha : float The current learning rate. - inits : (:class:`numpy.ndarray`, :class:`numpy.ndarray`) + inits : tuple of (:class:`numpy.ndarray`, :class:`numpy.ndarray`) Each worker's private work memory. Returns @@ -519,20 +608,31 @@ def train(self, sentences, total_examples=None, total_words=None, self.trainables.get_vocab_word_vecs(self.wv) def init_sims(self, replace=False): - """ + """Precompute L2-normalized vectors. + + Notes + ----- init_sims() resides in KeyedVectors because it deals with syn0 mainly, but because syn1 is not an attribute - of KeyedVectors, it has to be deleted in this class, and the normalizing of syn0 happens inside of KeyedVectors + of KeyedVectors, it has to be deleted in this class, and the normalizing of syn0 happens inside of KeyedVectors. + + Parameters + ---------- + replace : bool + If True, forget the original vectors and only keep the normalized ones to save RAM. + """ if replace and hasattr(self.trainables, 'syn1'): del self.trainables.syn1 - return self.wv.init_sims(replace) + self.wv.init_sims(replace) def clear_sims(self): - """ - Removes all L2-normalized vectors for words from the model. + """Removes all L2-normalized vectors for words from the model. + + Notes + ----- You will have to recompute them using init_sims method. - """ + """ self._clear_post_train() @deprecated("Method will be removed in 4.0.0, use self.wv.__getitem__() instead") @@ -553,10 +653,11 @@ def __contains__(self, word): @classmethod def load_fasttext_format(cls, model_file, encoding='utf8'): - """ - Load the input-hidden weight matrix from the fast text output files. + """Load the input-hidden weight matrix from the fast text output files. - Note that due to limitations in the FastText API, you cannot continue training + Notes + ------ + Due to limitations in the FastText API, you cannot continue training with a model loaded this way, though you can query for word similarity etc. Parameters @@ -565,14 +666,14 @@ def load_fasttext_format(cls, model_file, encoding='utf8'): Path to the FastText output files. FastText outputs two model files - `/path/to/model.vec` and `/path/to/model.bin` Expected value for this example: `/path/to/model` or `/path/to/model.bin`, - as gensim requires only `.bin` file to load entire fastText model. - encoding : str + as gensim requires only `.bin` file to the load entire fastText model. + encoding : str, optional Specifies the encoding. Returns ------- - :obj: `~gensim.models.fasttext.FastText` - Returns the loaded model as an instance of :class: `~gensim.models.fasttext.FastText`. + :class: `~gensim.models.fasttext.FastText` + The loaded model. """ model = cls() @@ -583,13 +684,28 @@ def load_fasttext_format(cls, model_file, encoding='utf8'): return model def load_binary_data(self, encoding='utf8'): - """Loads data from the output binary file created by FastText training""" + """Loads data from the output binary file created by FastText training. + + Parameters + ---------- + encoding : str, optional + Specifies the encoding. + + """ with utils.smart_open(self.file_name, 'rb') as f: self._load_model_params(f) self._load_dict(f, encoding=encoding) self._load_vectors(f) def _load_model_params(self, file_handle): + """Loads the models parameters from a file. + + Parameters + ---------- + file_handle : file-like object + Handle to an opened file. + + """ magic, version = self.struct_unpack(file_handle, '@2i') if magic == FASTTEXT_FILEFORMAT_MAGIC: # newer format self.new_format = True @@ -616,6 +732,18 @@ def _load_model_params(self, file_handle): self.vocabulary.sample = t def _load_dict(self, file_handle, encoding='utf8'): + """Loads a previously saved dictionary from disk. + + The dictionary is used to initialize the word vectors. + + Parameters + ---------- + file_handle : file-like object + The opened file handle to the persisted dictionary. + encoding : str + Specifies the encoding. + + """ vocab_size, nwords, nlabels = self.struct_unpack(file_handle, '@3i') # Vocab stored by [Dictionary::save](https://github.com/facebookresearch/fastText/blob/master/src/dictionary.cc) if nlabels > 0: @@ -653,6 +781,16 @@ def _load_dict(self, file_handle, encoding='utf8'): self.struct_unpack(file_handle, '@2i') def _load_vectors(self, file_handle): + """Loads the word vectors from disk. + + Parameters + ---------- + file_handle : file-like object + The opened file handle to the persisted dictionary. + encoding : str + Specifies the encoding. + + """ if self.new_format: self.struct_unpack(file_handle, '@?') # bool quant_input in fasttext.cc num_vectors, dim = self.struct_unpack(file_handle, '@2q') @@ -681,6 +819,21 @@ def _load_vectors(self, file_handle): self._clear_post_train() def struct_unpack(self, file_handle, fmt): + """Get the word vectors from disk using the cc format. + + Parameters + ---------- + file_handle : file_like object + Handle to an open file + fmt : str + Specified the format in which the C representation is saved. + + Returns + ------- + Tuple of (str) + String representation of each byte string found in the C file. + + """ num_bytes = struct.calcsize(fmt) return struct.unpack(fmt, file_handle.read(num_bytes)) @@ -709,8 +862,9 @@ def load(cls, *args, **kwargs): Returns ------- - :obj: `~gensim.models.fasttext.FastText` - Returns the loaded model as an instance of :class: `~gensim.models.fasttext.FastText`. + :class:`~gensim.models.fasttext.FastText` + The loaded model. + """ try: model = super(FastText, cls).load(*args, **kwargs) @@ -731,6 +885,7 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=None, case_inse class FastTextVocab(Word2VecVocab): + """Vocabulary used by :class:`~gensim.models.fasttext.FastText`.""" def __init__(self, max_vocab_size=None, min_count=5, sample=1e-3, sorted_vocab=True, null_word=0): super(FastTextVocab, self).__init__( max_vocab_size=max_vocab_size, min_count=min_count, sample=sample, @@ -745,6 +900,7 @@ def prepare_vocab(self, hs, negative, wv, update=False, keep_raw_vocab=False, tr class FastTextTrainables(Word2VecTrainables): + """Represents the inner shallow neural network used to train :class:`~gensim.models.fasttext.FastText`.""" def __init__(self, vector_size=100, seed=1, hashfxn=hash, bucket=2000000): super(FastTextTrainables, self).__init__( vector_size=vector_size, seed=seed, hashfxn=hashfxn) diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index ac7cdafbd5..007065ba12 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -244,6 +244,30 @@ cdef void fast_sentence_cbow_hs( def train_batch_sg(model, sentences, alpha, _work, _l1): + """Update skip-gram model by training on a sequence of sentences. + + Each sentence is a list of string tokens, which are looked up in the model's + vocab dictionary. Called internally from :meth:`gensim.models.fasttext.FastText.train()`. + + Parameters + ---------- + model : :class:`~gensim.models.fasttext.FastText` + Model to be trained. + sentences : iterable of list of str + Corpus streamed directly from disk/network. + alpha : float + Learning rate. + _work : np.ndarray, optional + Private working memory for each worker. + _l1 : np.ndarray, optional + Private working memory for each worker. + + Returns + ------- + int + Effective number of words trained. + + """ cdef int hs = model.hs cdef int negative = model.negative cdef int sample = (model.vocabulary.sample != 0) @@ -374,6 +398,29 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): def train_batch_cbow(model, sentences, alpha, _work, _neu1): + """Update the CBOW model by training on a sequence of sentences. + + Each sentence is a list of string tokens, which are looked up in the model's + vocab dictionary. Called internally from :meth:`gensim.models.fasttext.FastText.train()`. + + Parameters + ---------- + model : :class:`~gensim.models.fasttext.FastText` + Model to be trained. + sentences : iterable of list of str + Corpus streamed directly from disk/network. + alpha : float + Learning rate. + _work : np.ndarray, optional + Private working memory for each worker. + _neu1 : np.ndarray, optional + Private working memory for each worker. + Returns + ------- + int + Effective number of words trained. + + """ cdef int hs = model.hs cdef int negative = model.negative cdef int sample = (model.vocabulary.sample != 0) diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py index cb9bce4aba..bb61036e8c 100644 --- a/gensim/models/poincare.py +++ b/gensim/models/poincare.py @@ -6,8 +6,11 @@ # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html -"""Python implementation of Poincaré Embeddings [1]_, an embedding that is better at capturing latent hierarchical -information than traditional Euclidean embeddings. The method is described in more detail in [1]_. +"""Python implementation of Poincaré Embeddings. + +These embeddings are better at capturing latent hierarchical information than traditional Euclidean embeddings. +The method is described in detail in `Maximilian Nickel, Douwe Kiela - +"Poincaré Embeddings for Learning Hierarchical Representations" `_. The main use-case is to automatically learn hierarchical representations of nodes from a tree-like structure, such as a Directed Acyclic Graph, using a transitive closure of the relations. Representations of nodes in a @@ -16,8 +19,6 @@ This module allows training a Poincaré Embedding from a training file containing relations of graph in a csv-like format, or a Python iterable of relations. -.. [1] Maximilian Nickel, Douwe Kiela - "Poincaré Embeddings for Learning Hierarchical Representations" - https://arxiv.org/abs/1705.08039 Examples -------- @@ -73,10 +74,20 @@ class PoincareModel(utils.SaveLoad): and :meth:`~gensim.models.poincare.PoincareModel.load` methods, or stored/loaded in the word2vec format via `model.kv.save_word2vec_format` and :meth:`~gensim.models.poincare.PoincareKeyedVectors.load_word2vec_format`. - Note that training cannot be resumed from a model loaded via `load_word2vec_format`, if you wish to train further, + Notes + ----- + Training cannot be resumed from a model loaded via `load_word2vec_format`, if you wish to train further, use :meth:`~gensim.models.poincare.PoincareModel.save` and :meth:`~gensim.models.poincare.PoincareModel.load` methods instead. + An important attribute (that provides a lot of additional functionality when directly accessed) are the \ + keyed vectors: + + self.kv : :class:`~gensim.models.poincare.PoincareKeyedVectors` + This object essentially contains the mapping between nodes and embeddings, as well the vocabulary of the model + (set of unique nodes seen by the model). After training, it can be used to perform operations on the vectors \ + such as vector lookup, distance etc. See the documentation of its class for many usage examples. + """ def __init__(self, train_data, size=50, alpha=0.1, negative=10, workers=1, epsilon=1e-5, regularization_coeff=1.0, burn_in=10, burn_in_alpha=0.01, init_range=(-0.001, 0.001), dtype=np.float64, seed=0): @@ -84,11 +95,11 @@ def __init__(self, train_data, size=50, alpha=0.1, negative=10, workers=1, epsil Parameters ---------- - train_data : iterable of (str, str) - Iterable of relations, e.g. a list of tuples, or a PoincareRelations instance streaming from a file. - Note that the relations are treated as ordered pairs, i.e. a relation (a, b) does not imply the - opposite relation (b, a). In case the relations are symmetric, the data should contain both relations - (a, b) and (b, a). + train_data : {iterable of (str, str), :class:`gensim.models.poincare.PoincareRelations` + Iterable of relations, e.g. a list of tuples, or a :class:`gensim.models.poincare.PoincareRelations` + instance streaming from a file. Note that the relations are treated as ordered pairs, + i.e. a relation (a, b) does not imply the opposite relation (b, a). In case the relations are symmetric, + the data should contain both relations (a, b) and (b, a). size : int, optional Number of dimensions of the trained model. alpha : float, optional @@ -190,6 +201,7 @@ def _init_embeddings(self): self.kv.syn0 = self._np_random.uniform(self.init_range[0], self.init_range[1], shape).astype(self.dtype) def _init_node_probabilities(self): + """Initialize the a-priori probabilities. """ counts = np.array([ self.kv.vocab[self.kv.index2word[i]].count for i in range(len(self.kv.index2word)) @@ -271,7 +283,7 @@ def _loss_fn(matrix, regularization_coeff=1.0): ---------- matrix : numpy.array Array containing vectors for u, v and negative samples, of shape (2 + negative_size, dim). - regularization_coeff : float + regularization_coeff : float, optional Coefficient to use for l2-regularization Returns @@ -305,7 +317,7 @@ def _clip_vectors(vectors, epsilon): Parameters ---------- vectors : numpy.array - Can be 1-D,or 2-D (in which case the norm for each row is checked). + Can be 1-D, or 2-D (in which case the norm for each row is checked). epsilon : float Parameter for numerical stability, each dimension of the vector is reduced by `epsilon` if the norm of the vector is greater than or equal to 1. @@ -334,7 +346,20 @@ def _clip_vectors(vectors, epsilon): return vectors def save(self, *args, **kwargs): - """Save complete model to disk, inherited from :class:`gensim.utils.SaveLoad`.""" + """Save complete model to disk, inherited from :class:`~gensim.utils.SaveLoad`. + + See also + -------- + :meth:`~gensim.models.poincare.PoincareModel.load` + + Parameters + ---------- + *args + Positional arguments passed to :meth:`~gensim.utils.SaveLoad.save`. + **kwargs + Keyword arguments passed to :meth:`~gensim.utils.SaveLoad.save`. + + """ self._loss_grad = None # Can't pickle autograd fn to disk attrs_to_ignore = ['_node_probabilities', '_node_counts_cumsum'] kwargs['ignore'] = set(list(kwargs.get('ignore', [])) + attrs_to_ignore) @@ -342,7 +367,25 @@ def save(self, *args, **kwargs): @classmethod def load(cls, *args, **kwargs): - """Load model from disk, inherited from :class:`~gensim.utils.SaveLoad`.""" + """Load model from disk, inherited from :class:`~gensim.utils.SaveLoad`. + + See also + -------- + :meth:`~gensim.models.poincare.PoincareModel.save` + + Parameters + ---------- + *args + Positional arguments passed to :meth:`~gensim.utils.SaveLoad.load`. + **kwargs + Keyword arguments passed to :meth:`~gensim.utils.SaveLoad.load`. + + Returns + ------- + :class:`~gensim.models.poincare.PoincareModel` + The loaded model. + + """ model = super(PoincareModel, cls).load(*args, **kwargs) model._init_node_probabilities() return model @@ -352,7 +395,6 @@ def _prepare_training_batch(self, relations, all_negatives, check_gradients=Fals Parameters ---------- - relations : list of tuples List of tuples of positive examples of the form (node_1_index, node_2_index). all_negatives : list of lists @@ -390,12 +432,14 @@ def _check_gradients(self, relations, all_negatives, batch, tol=1e-8): Parameters ---------- - batch : PoincareBatch instance - Batch for which computed gradients are to checked. relations : list of tuples List of tuples of positive examples of the form (node_1_index, node_2_index). all_negatives : list of lists List of lists of negative samples for each node_1 in the positive examples. + batch : :class:`~gensim.models.poincare.PoincareBatch` + Batch for which computed gradients are to be checked. + tol : float, optional + The maximum error between our computed gradients and the reference ones from autograd. """ if not AUTOGRAD_PRESENT: @@ -425,7 +469,7 @@ def _sample_negatives_batch(self, nodes): Parameters ---------- - nodes : list + nodes : list of int List of node indices for which negative samples are to be returned. Returns @@ -442,7 +486,7 @@ def _train_on_batch(self, relations, check_gradients=False): Parameters ---------- - relations : list of tuples + relations : list of tuples of (int, int) List of tuples of positive examples of the form (node_1_index, node_2_index). check_gradients : bool, optional Whether to compare the computed gradients to autograd gradients for this batch. @@ -466,7 +510,7 @@ def _handle_duplicates(vector_updates, node_indices): ---------- vector_updates : numpy.array Array with each row containing updates to be performed on a certain node. - node_indices : list + node_indices : list of int Node indices on which the above updates are to be performed on. Notes @@ -518,11 +562,11 @@ def train(self, epochs, batch_size=10, print_every=1000, check_gradients_every=N Parameters ---------- - - batch_size : int, optional - Number of examples to train on in a single batch. epochs : int Number of iterations (epochs) over the corpus. + batch_size : int, optional + Number of examples to train on in a single batch. + print_every : int, optional Prints progress and average loss after every `print_every` batches. check_gradients_every : int or None, optional @@ -625,18 +669,16 @@ def __init__(self, vectors_u, vectors_v, indices_u, indices_v, regularization_co Parameters ---------- vectors_u : numpy.array - Vectors of all nodes `u` in the batch. - Expected shape (batch_size, dim). + Vectors of all nodes `u` in the batch. Expected shape (batch_size, dim). vectors_v : numpy.array Vectors of all positively related nodes `v` and negatively sampled nodes `v'`, - for each node `u` in the batch. - Expected shape (1 + neg_size, dim, batch_size). - indices_u : list + for each node `u` in the batch. Expected shape (1 + neg_size, dim, batch_size). + indices_u : list of int List of node indices for each of the vectors in `vectors_u`. - indices_v : list + indices_v : list of lists of int Nested list of lists, each of which is a list of node indices for each of the vectors in `vectors_v` for a specific node `u`. - regularization_coeff : float + regularization_coeff : float, optional Coefficient to use for l2-regularization """ @@ -802,10 +844,18 @@ def word_vec(self, word): Accept a single word as input. Returns the word's representations in vector space, as a 1D numpy array. - Example:: + Examples + -------- - >>> trained_model.word_vec('office') - array([ -1.40128313e-02, ...]) + >>> from gensim.test.utils import datapath + >>> + >>> # Read the sample relations file and train the model + >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv')) + >>> model = PoincareModel(train_data=relations) + >>> model.train(epochs=50) + >>> + >>> # Query the trained model. + >>> wv = model.kv.word_vec('kangaroo.n.01') """ return super(PoincareKeyedVectors, self).get_vector(word) @@ -828,9 +878,16 @@ def words_closer_than(self, w1, w2): Examples -------- - - >>> model.words_closer_than('carnivore.n.01', 'mammal.n.01') - ['dog.n.01', 'canine.n.02'] + >>> from gensim.test.utils import datapath + >>> + >>> # Read the sample relations file and train the model + >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv')) + >>> model = PoincareModel(train_data=relations) + >>> model.train(epochs=50) + >>> + >>> # Which term is closer to 'kangaroo' than 'metatherian' is to 'kangaroo'? + >>> model.kv.words_closer_than('kangaroo.n.01', 'metatherian.n.01') + [u'marsupial.n.01', u'phalanger.n.01'] """ return super(PoincareKeyedVectors, self).closer_than(w1, w2) @@ -1054,9 +1111,16 @@ def distance(self, w1, w2): Examples -------- - - >>> model.distance('mammal.n.01', 'carnivore.n.01') - 2.13 + >>> from gensim.test.utils import datapath + >>> + >>> # Read the sample relations file and train the model + >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv')) + >>> model = PoincareModel(train_data=relations) + >>> model.train(epochs=50) + >>> + >>> # What is the distance between the words 'mammal' and 'carnivore'? + >>> model.kv.distance('mammal.n.01', 'carnivore.n.01') + 2.9742298803339304 Notes ----- @@ -1085,9 +1149,16 @@ def similarity(self, w1, w2): Examples -------- - - >>> model.similarity('mammal.n.01', 'carnivore.n.01') - 0.73 + >>> from gensim.test.utils import datapath + >>> + >>> # Read the sample relations file and train the model + >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv')) + >>> model = PoincareModel(train_data=relations) + >>> model.train(epochs=50) + >>> + >>> # What is the similarity between the words 'mammal' and 'carnivore'? + >>> model.kv.similarity('mammal.n.01', 'carnivore.n.01') + 0.25162107631176484 Notes ----- @@ -1120,8 +1191,16 @@ def most_similar(self, node_or_vector, topn=10, restrict_vocab=None): Examples -------- - >>> vectors.most_similar('lion.n.01') - [('lion_cub.n.01', 0.4484), ('lionet.n.01', 0.6552), ...] + >>> from gensim.test.utils import datapath + >>> + >>> # Read the sample relations file and train the model + >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv')) + >>> model = PoincareModel(train_data=relations) + >>> model.train(epochs=50) + >>> + >>> # Which words are most similar to 'kangaroo'? + >>> model.kv.most_similar('kangaroo.n.01', topn=2) + [(u'kangaroo.n.01', 0.0), (u'marsupial.n.01', 0.26524229460827725)] """ if not restrict_vocab: @@ -1153,10 +1232,10 @@ def distances(self, node_or_vector, other_nodes=()): Parameters ---------- - node_or_vector : str/int or numpy.array + node_or_vector : {str, int, numpy.array} Node key or vector from which distances are to be computed. - other_nodes : iterable of str/int or None + other_nodes : {iterable of str, iterable of int, None}, optional For each node in `other_nodes` distance from `node_or_vector` is computed. If None or empty, distance of `node_or_vector` from all nodes in vocab is computed (including itself). @@ -1168,12 +1247,19 @@ def distances(self, node_or_vector, other_nodes=()): Examples -------- + >>> from gensim.test.utils import datapath + >>> + >>> # Read the sample relations file and train the model + >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv')) + >>> model = PoincareModel(train_data=relations) + >>> model.train(epochs=50) + >>> + >>> # Check the distances between a word and a list of other words. + >>> model.kv.distances('mammal.n.01', ['carnivore.n.01', 'dog.n.01']) + array([2.97422988, 2.83007402]) - >>> model.distances('mammal.n.01', ['carnivore.n.01', 'dog.n.01']) - np.array([2.1199, 2.0710] - - >>> model.distances('mammal.n.01') - np.array([0.43753847, 3.67973852, ..., 6.66172886]) + >>> # Check the distances between a word and every other word in the vocab. + >>> all_distances = model.kv.distances('mammal.n.01') Notes ----- @@ -1198,7 +1284,7 @@ def norm(self, node_or_vector): Parameters ---------- - node_or_vector : str/int or numpy.array + node_or_vector : {str, int, numpy.array} Input node key or vector for which position in hierarchy is to be returned. Returns @@ -1208,9 +1294,16 @@ def norm(self, node_or_vector): Examples -------- - - >>> model.norm('mammal.n.01') - 0.9 + >>> from gensim.test.utils import datapath + >>> + >>> # Read the sample relations file and train the model + >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv')) + >>> model = PoincareModel(train_data=relations) + >>> model.train(epochs=50) + >>> + >>> # Get the norm of the embedding of the word `mammal`. + >>> model.kv.norm('mammal.n.01') + 0.6423008703542398 Notes ----- @@ -1230,10 +1323,10 @@ def difference_in_hierarchy(self, node_or_vector_1, node_or_vector_2): Parameters ---------- - node_or_vector_1 : str/int or numpy.array + node_or_vector_1 : {str, int, numpy.array} Input node key or vector. - node_or_vector_2 : str/int or numpy.array + node_or_vector_2 : {str, int, numpy.array} Input node key or vector. Returns @@ -1243,12 +1336,18 @@ def difference_in_hierarchy(self, node_or_vector_1, node_or_vector_2): Examples -------- + >>> from gensim.test.utils import datapath + >>> + >>> # Read the sample relations file and train the model + >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv')) + >>> model = PoincareModel(train_data=relations) + >>> model.train(epochs=50) + >>> + >>> model.kv.difference_in_hierarchy('mammal.n.01', 'dog.n.01') + 0.05382517902410999 - >>> model.difference_in_hierarchy('mammal.n.01', 'dog.n.01') - 0.51 - - >>> model.difference_in_hierarchy('dog.n.01', 'mammal.n.01') - -0.51 + >>> model.kv.difference_in_hierarchy('dog.n.01', 'mammal.n.01') + -0.05382517902410999 Notes ----- @@ -1364,7 +1463,7 @@ def __init__(self, file_path, embedding): ---------- file_path : str Path to tsv file containing relation pairs. - embedding : PoincareKeyedVectors instance + embedding : :class:`~gensim.models.poincare.PoincareKeyedVectors` Embedding to be evaluated. """ @@ -1391,15 +1490,15 @@ def get_positive_relation_ranks_and_avg_prec(all_distances, positive_relations): Parameters ---------- - all_distances : numpy.array (float) + all_distances : numpy.array of float Array of all distances (floats) for a specific item. positive_relations : list List of indices of positive relations for the item. Returns ------- - tuple (list, float) - The list contains ranks (int) of positive relations in the same order as `positive_relations`. + tuple (list of int, float) + The list contains ranks of positive relations in the same order as `positive_relations`. The float is the Average Precision of the ranking. e.g. ([1, 2, 3, 20], 0.610). @@ -1418,12 +1517,12 @@ def evaluate(self, max_n=None): Parameters ---------- - max_n : int or None + max_n : int, optional Maximum number of positive relations to evaluate, all if `max_n` is None. Returns ------- - dict + dict of (str, float) Contains (metric_name, metric_value) pairs. e.g. {'mean_rank': 50.3, 'MAP': 0.31}. @@ -1436,12 +1535,12 @@ def evaluate_mean_rank_and_map(self, max_n=None): Parameters ---------- - max_n : int or None + max_n : int, optional Maximum number of positive relations to evaluate, all if `max_n` is None. Returns ------- - tuple (float, float) + tuple of (float, float) Contains (mean_rank, MAP). e.g (50.3, 0.31) @@ -1475,7 +1574,7 @@ def __init__(self, train_path, test_path, embedding): Path to tsv file containing relation pairs used for training. test_path : str Path to tsv file containing relation pairs to evaluate. - embedding : PoincareKeyedVectors instance + embedding : :class:`~gensim.models.poincare.PoincareKeyedVectors` Embedding to be evaluated. """ @@ -1504,17 +1603,17 @@ def get_unknown_relation_ranks_and_avg_prec(all_distances, unknown_relations, kn Parameters ---------- - all_distances : numpy.array (float) + all_distances : numpy.array of float Array of all distances for a specific item. - unknown_relations : list + unknown_relations : list of int List of indices of unknown positive relations. - known_relations : list + known_relations : list of int List of indices of known positive relations. Returns ------- - tuple (list, float) - The list contains ranks (int) of positive relations in the same order as `positive_relations`. + tuple (list of int, float) + The list contains ranks of positive relations in the same order as `positive_relations`. The float is the Average Precision of the ranking. e.g. ([1, 2, 3, 20], 0.610). @@ -1534,12 +1633,12 @@ def evaluate(self, max_n=None): Parameters ---------- - max_n : int or None + max_n : int, optional Maximum number of positive relations to evaluate, all if `max_n` is None. Returns ------- - dict + dict of (str, float) Contains (metric_name, metric_value) pairs. e.g. {'mean_rank': 50.3, 'MAP': 0.31}. @@ -1552,7 +1651,7 @@ def evaluate_mean_rank_and_map(self, max_n=None): Parameters ---------- - max_n : int or None + max_n : int, optional Maximum number of positive relations to evaluate, all if `max_n` is None. Returns @@ -1608,7 +1707,7 @@ def score_function(self, embedding, trie, term_1, term_2): Parameters ---------- - embedding : PoincareKeyedVectors instance + embedding : :class:`~gensim.models.poincare.PoincareKeyedVectors` Embedding to use for computing predicted score. trie : pygtrie.Trie instance Trie to use for finding matching vocab terms for input terms. @@ -1655,7 +1754,7 @@ def find_matching_terms(trie, word): Returns ------- - list (str) + list of str List of matching terms. """ @@ -1669,7 +1768,7 @@ def create_vocab_trie(embedding): Parameters ---------- - embedding : PoincareKeyedVectors instance + embedding : :class:`~gensim.models.poincare.PoincareKeyedVectors` Embedding for which trie is to be created. Returns @@ -1694,7 +1793,7 @@ def evaluate_spearman(self, embedding): Parameters ---------- - embedding : PoincareKeyedVectors instance + embedding : :class:`~gensim.models.poincare.PoincareKeyedVectors` Embedding for which evaluation is to be done. Returns diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 9539aa8d2c..5e11c7e458 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -6,10 +6,14 @@ # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html """Produce word vectors with deep learning via word2vec's "skip-gram and CBOW models", using either -hierarchical softmax or negative sampling [1]_ [2]_. +hierarchical softmax or negative sampling: `Efficient Estimation of Word Representations in Vector Space +`_, `Distributed Representations of Words and Phrases and their Compositionality +`_. -NOTE: There are more ways to get word vectors in Gensim than just Word2Vec. -See FastText and wrappers for VarEmbed and WordRank. +Notes +----- +There are more ways to get word vectors in Gensim than just Word2Vec. +See :class:`~gensim.models.fasttext.FastText` and wrappers for VarEmbed and WordRank. The training algorithms were originally ported from the C package https://code.google.com/p/word2vec/ and extended with additional functionality. @@ -18,72 +22,81 @@ visit http://radimrehurek.com/2014/02/word2vec-tutorial/ **Make sure you have a C compiler before installing gensim, to use optimized (compiled) word2vec training** -(70x speedup compared to plain NumPy implementation [3]_). +(70x speedup compared to plain NumPy implementation `Optimizing word2vec in gensim +`_). -Initialize a model with e.g.:: +Examples +-------- - >>> model = Word2Vec(sentences, size=100, window=5, min_count=5, workers=4) +#. Initialize a model with e.g.:: + >>> from gensim.test.utils import common_texts + >>> + >>> model = Word2Vec(size=4, window=2, min_count=1, workers=4) + >>> model.build_vocab(common_texts) -Persist a model to disk with:: +#. Persist a model to disk with:: + >>> model.save("temp_model.w2v") + >>> model = Word2Vec.load("temp_model.w2v") # you can continue training with the loaded model! - >>> model.save(fname) - >>> model = Word2Vec.load(fname) # you can continue training with the loaded model! + The word vectors are stored in a KeyedVectors instance in `model.wv`. + This separates the read-only word vector lookup operations in KeyedVectors from the training code in Word2Vec:: -The word vectors are stored in a KeyedVectors instance in model.wv. -This separates the read-only word vector lookup operations in KeyedVectors from the training code in Word2Vec:: - - >>> model.wv['computer'] # numpy vector of a word - array([-0.00449447, -0.00310097, 0.02421786, ...], dtype=float32) + >>> computer_vec = model.wv['computer'] # numpy vector of a word The word vectors can also be instantiated from an existing file on disk in the word2vec C format as a KeyedVectors instance. -NOTE: It is impossible to continue training the vectors loaded from the C format because hidden weights, -vocabulary frequency and the binary tree is missing:: +Notes +----- +It is impossible to continue training the vectors loaded from the C format because hidden weights, +vocabulary frequency and the binary tree are missing:: >>> from gensim.models import KeyedVectors - >>> word_vectors = KeyedVectors.load_word2vec_format('/tmp/vectors.txt', binary=False) # C text format - >>> word_vectors = KeyedVectors.load_word2vec_format('/tmp/vectors.bin', binary=True) # C binary format - + >>> + >>> # Save and load key word vectors in C text format. + >>> model.wv.save_word2vec_format('vectors.txt', binary=False) + >>> word_vectors = KeyedVectors.load_word2vec_format('vectors.txt', binary=False) + >>> + >>> # Save and load key word vectors in C binary format. + >>> model.wv.save_word2vec_format('vectors.bin', binary=True) + >>> word_vectors = KeyedVectors.load_word2vec_format('vectors.bin', binary=True) -You can perform various NLP word tasks with the model. Some of them -are already built-in:: +#. You can perform various NLP word tasks with the model. Some of them are already built-in:: - >>> model.wv.most_similar(positive=['woman', 'king'], negative=['man']) - [('queen', 0.50882536), ...] + >>> similarities = model.wv.most_similar(positive=['computer', 'human'], negative=['interface']) + >>> most_similar = similarities[0] - >>> model.wv.most_similar_cosmul(positive=['woman', 'king'], negative=['man']) - [('queen', 0.71382287), ...] + >>> similarities = model.wv.most_similar_cosmul(positive=['computer', 'human'], negative=['interface']) + >>> most_similar = similarities[0] + >>> not_matching = model.wv.doesnt_match("human computer interface tree".split()) - >>> model.wv.doesnt_match("breakfast cereal dinner lunch".split()) - 'cereal' + >>> sim_score = model.wv.similarity('computer', 'human') - >>> model.wv.similarity('woman', 'man') - 0.73723527 +#. Probability of a (possibly unseen) text under the model:: -Probability of a text under the model:: + >>> # Note that score is only implemented for the hierarchical softmax scheme. + >>> model = Word2Vec(size=4, window=2, min_count=1, workers=4, hs=1, negative=0) + >>> model.build_vocab(common_texts) + >>> proba = model.score(["The fox jumped over a lazy dog".split()]) - >>> model.score(["The fox jumped over a lazy dog".split()]) - 0.2158356 +#. Correlation with human opinion on word similarity:: -Correlation with human opinion on word similarity:: + >>> from gensim.test.utils import datapath + >>> + >>> similarities = model.wv.evaluate_word_pairs(datapath('wordsim353.tsv')) - >>> model.wv.evaluate_word_pairs(os.path.join(module_path, 'test_data','wordsim353.tsv')) - 0.51, 0.62, 0.13 +#. And on analogies:: -And on analogies:: - - >>> model.wv.evaluate_word_analogies(os.path.join(module_path, 'test_data', 'questions-words.txt'))[0] - 0.58 + >>> analogies = model.wv.accuracy(datapath('questions-words.txt')) and so on. If you're finished training a model (i.e. no more updates, only querying), then switch to the :mod:`gensim.models.KeyedVectors` instance in wv - >>> word_vectors = model.wv - >>> del model + >>> word_vectors = model.wv + >>> del model to trim unneeded model memory = use much less RAM. @@ -91,14 +104,12 @@ detect phrases longer than one word. Using phrases, you can learn a word2vec model where "words" are actually multiword expressions, such as `new_york_times` or `financial_crisis`: - >>> bigram_transformer = gensim.models.Phrases(sentences) - >>> model = Word2Vec(bigram_transformer[sentences], size=100, ...) + >>> from gensim.models import Phrases + >>> + >>> bigram_transformer = Phrases(common_texts) + >>> model = Word2Vec(size=5) + >>> model.build_vocab(bigram_transformer[common_texts]) -.. [1] Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. - Efficient Estimation of Word Representations in Vector Space. In Proceedings of Workshop at ICLR, 2013. -.. [2] Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg Corrado, and Jeffrey Dean. - Distributed Representations of Words and Phrases and their Compositionality. In Proceedings of NIPS, 2013. -.. [3] Optimizing word2vec in gensim, http://radimrehurek.com/2013/09/word2vec-in-python-part-two-optimizing/ """ from __future__ import division # py3 "true division" @@ -146,12 +157,34 @@ MAX_WORDS_IN_BATCH = 10000 def train_batch_sg(model, sentences, alpha, work=None, compute_loss=False): - """ - Update skip-gram model by training on a sequence of sentences. - Each sentence is a list of string tokens, which are looked up in the model's - vocab dictionary. Called internally from `Word2Vec.train()`. + """Update skip-gram model by training on a batch of sentences. + + Called internally from :meth:`~gensim.models.word2vec.Word2Vec.train`. + + Notes + ----- This is the non-optimized, Python version. If you have cython installed, gensim - will use the optimized version from word2vec_inner instead. + will use the optimized version found in :mod:`~gensim.models.word2vec_inner` instead. + + Parameters + ---------- + model : :class:`~gensim.models.word2Vec.Word2Vec` + The Word2Vec model instance to train. + sentences : iterable of list of str + The corpus used to train the model. + alpha : float + The learning rate + work : object, optional + Unused. + compute_loss : bool, optional + Whether or not the training loss should be computed in this batch. + + Returns + ------- + int + Number of words in the vocabulary actually used for training (They already existed in the vocabulary + and were not discarded by negative sampling). + """ result = 0 for sentence in sentences: @@ -173,12 +206,36 @@ def train_batch_sg(model, sentences, alpha, work=None, compute_loss=False): return result def train_batch_cbow(model, sentences, alpha, work=None, neu1=None, compute_loss=False): - """ - Update CBOW model by training on a sequence of sentences. - Each sentence is a list of string tokens, which are looked up in the model's - vocab dictionary. Called internally from `Word2Vec.train()`. + """Update CBOW model by training on a batch of sentences. + + Called internally from :meth:`~gensim.models.word2vec.Word2Vec.train`. + + Notes + ----- This is the non-optimized, Python version. If you have cython installed, gensim - will use the optimized version from word2vec_inner instead. + will use the optimized version found in :mod:`~gensim.models.word2vec_inner` instead. + + Parameters + ---------- + model : :class:`~gensim.models.word2vec.Word2Vec` + The Word2Vec model instance to train. + sentences : iterable of list of str + The corpus used to train the model. + alpha : float + The learning rate + work : object, optional + Unused. + neu1 : object, optional + Unused. + compute_loss : bool, optional + Whether or not the training loss should be computed in this batch. + + Returns + ------- + int + Number of words in the vocabulary actually used for training (They already existed in the vocabulary + and were not discarded by negative sampling). + """ result = 0 for sentence in sentences: @@ -197,12 +254,27 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None, compute_loss return result def score_sentence_sg(model, sentence, work=None): - """ - Obtain likelihood score for a single sentence in a fitted skip-gram representaion. - The sentence is a list of Vocab objects (or None, when the corresponding - word is not in the vocabulary). Called internally from `Word2Vec.score()`. + """Obtain likelihood score for a single sentence in a fitted skip-gram representation. + + Notes + ----- This is the non-optimized, Python version. If you have cython installed, gensim - will use the optimized version from word2vec_inner instead. + will use the optimized version found in :mod:`~gensim.models.word2vec_inner` instead. + + Parameters + ---------- + model : :class:`~gensim.models.word2vec.Word2Vec` + The trained model. It **MUST** have been trained using hierarchical softmax and the skip-gram algorithm. + sentence : list of str + The words comprising the sentence to be scored. + work : object, optional + Unused. + + Returns + ------- + float + The probability assigned to this sentence by the Skip-Gram model. + """ log_prob_sentence = 0.0 if model.negative: @@ -223,12 +295,29 @@ def score_sentence_sg(model, sentence, work=None): return log_prob_sentence def score_sentence_cbow(model, sentence, work=None, neu1=None): - """ - Obtain likelihood score for a single sentence in a fitted CBOW representaion. - The sentence is a list of Vocab objects (or None, where the corresponding - word is not in the vocabulary. Called internally from `Word2Vec.score()`. + """Obtain likelihood score for a single sentence in a fitted CBOW representation. + + Notes + ----- This is the non-optimized, Python version. If you have cython installed, gensim - will use the optimized version from word2vec_inner instead. + will use the optimized version found in :mod:`~gensim.models.word2vec_inner` instead. + + Parameters + ---------- + model : :class:`~gensim.models.word2vec.Word2Vec` + The trained model. It **MUST** have been trained using hierarchical softmax and the CBOW algorithm. + sentence : list of str + The words comprising the sentence to be scored. + work : object, optional + Unused. + neu1 : object, optional + Unused. + + Returns + ------- + float + The probability assigned to this sentence by the Skip-Gram model. + """ log_prob_sentence = 0.0 if model.negative: @@ -252,6 +341,38 @@ def score_sentence_cbow(model, sentence, work=None, neu1=None): def train_sg_pair(model, word, context_index, alpha, learn_vectors=True, learn_hidden=True, context_vectors=None, context_locks=None, compute_loss=False, is_ft=False): + """Trains the passed model instance on a word and its context, using the Skip-gram algorithm. + + Parameters + ---------- + model : :class:`~gensim.models.word2vec.Word2Vec` + The model to be trained. + word : str + The label (predicted) word. + context_index : list of int + The vocabulary indices of the words in the context. + alpha : float + Learning rate. + learn_vectors : bool, optional + Whether the vectors should be updated. + learn_hidden : bool, optional + Whether the weights of the hidden layer should be updated. + context_vectors : list of list of float, optional + Vector representations of the words in the context. If None, these will be retrieved from the model. + context_locks : list of float, optional + The lock factors for each word in the context. + compute_loss : bool, optional + Whether or not the training loss should be computed. + is_ft : bool, optional + If True, weights will be computed using `model.wv.syn0_vocab` and `model.wv.syn0_ngrams` + instead of `model.wv.syn0` + + Returns + ------- + list of float + Error vector to be back-propagated. + + """ if context_vectors is None: if is_ft: context_vectors_vocab = model.wv.syn0_vocab @@ -328,6 +449,40 @@ def train_sg_pair(model, word, context_index, alpha, learn_vectors=True, learn_h def train_cbow_pair(model, word, input_word_indices, l1, alpha, learn_vectors=True, learn_hidden=True, compute_loss=False, context_vectors=None, context_locks=None, is_ft=False): + """Trains the passed model instance on a word and its context, using the CBOW algorithm. + + Parameters + ---------- + model : :class:`~gensim.models.word2vec.Word2Vec` + The model to be trained. + word : str + The label (predicted) word. + input_word_indices : list of int + The vocabulary indices of the words in the context. + l1 : list of float + Vector representation of the label word. + alpha : float + Learning rate. + learn_vectors : bool, optional + Whether the vectors should be updated. + learn_hidden : bool, optional + Whether the weights of the hidden layer should be updated. + compute_loss : bool, optional + Whether or not the training loss should be computed. + context_vectors : list of list of float, optional + Vector representations of the words in the context. If None, these will be retrieved from the model. + context_locks : list of float, optional + The lock factors for each word in the context. + is_ft : bool, optional + If True, weights will be computed using `model.wv.syn0_vocab` and `model.wv.syn0_ngrams` + instead of `model.wv.syn0` + + Returns + ------- + list of float + Error vector to be back-propagated. + + """ if context_vectors is None: if is_ft: context_vectors_vocab = model.wv.syn0_vocab @@ -396,6 +551,23 @@ def train_cbow_pair(model, word, input_word_indices, l1, alpha, learn_vectors=Tr def score_sg_pair(model, word, word2): + """Score the trained Skip-gram model on a pair of words. + + Parameters + ---------- + model : :class:`~gensim.models.word2vec.Word2Vec` + The trained model. + word : :class:`~gensim.models.keyedvectors.Vocab` + Vocabulary representation of the first word. + word2 : :class:`~gensim.models.keyedvectors.Vocab` + Vocabulary representation of the second word. + + Returns + ------- + float + Logarithm of the sum of exponentiations of input words. + + """ l1 = model.wv.syn0[word2.index] l2a = deepcopy(model.syn1[word.point]) # 2d matrix, codelen x layer1_size sgn = (-1.0) ** word.code # ch function, 0-> 1, 1 -> -1 @@ -404,6 +576,23 @@ def score_sg_pair(model, word, word2): def score_cbow_pair(model, word, l1): + """Score the trained CBOW model on a pair of words. + + Parameters + ---------- + model : :class:`~gensim.models.word2vec.Word2Vec` + The trained model. + word : :class:`~gensim.models.keyedvectors.Vocab` + Vocabulary representation of the first word. + l1 : list of float + Vector representation of the second word. + + Returns + ------- + float + Logarithm of the sum of exponentiations of input words. + + """ l2a = model.syn1[word.point] # 2d matrix, codelen x layer1_size sgn = (-1.0) ** word.code # ch function, 0-> 1, 1 -> -1 lprob = -logaddexp(0, -sgn * dot(l1, l2a.T)) @@ -421,6 +610,24 @@ class Word2Vec(BaseWordEmbeddingsModel): compatible with the original word2vec implementation via `wv.save_word2vec_format()` and `Word2VecKeyedVectors.load_word2vec_format()`. + + Some important attributes are the following: + + self.wv : :class:`~gensim.models.keyedvectors.Word2VecKeyedVectors` + This object essentially contains the mapping between words and embeddings. After training, it can be used + directly to query those embeddings in various ways. See the module level docstring for examples. + + self.vocabulary : :class:'~gensim.models.word2vec.Word2VecVocab' + This object represents the vocabulary (sometimes called Dictionary in gensim) of the model. + Besides keeping track of all unique words, this object provides extra functionality, such as + constructing a huffman tree (frequent words are closer to the root), or discarding extremely rare words. + + self.trainables : :class:`~gensim.models.word2vec.Word2VecTrainables` + This object represents the inner shallow neural network used to train the embeddings. The semantics of the + network differ slightly in the two available training modes (CBOW or SG) but you can think of it as a NN with + a single projection and hidden layer which we train on the corpus. The weights are then used as our embeddings + (which means that the size of the hidden layer is equal to the number of features `self.size`). + """ def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, @@ -434,14 +641,13 @@ def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, Parameters ---------- - sentences : iterable of iterables + sentences : iterable of list of str The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. If you don't supply `sentences`, the model is left uninitialized -- use if you plan to initialize it in some other way. - sg : int {1, 0} Defines the training algorithm. If 1, skip-gram is employed; otherwise, CBOW is used. size : int @@ -486,14 +692,20 @@ def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, Hash function to use to randomly initialize weights, for increased training reproducibility. iter : int Number of iterations (epochs) over the corpus. - trim_rule : function + trim_rule : function, optional Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`), or a callable that accepts parameters (word, count, min_count) and returns either :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`. - Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part - of the model. + The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part of the + model. + + The input parameters are of the following types: + * `word` (str) - the word we are examining + * `count` (int) - the word's frequency count in the corpus + * `min_count` (int) - the minimum count threshold. + sorted_vocab : int {1,0} If 1, sort the vocabulary by descending frequency before assigning word indexes. batch_words : int @@ -534,9 +746,22 @@ def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, fast_version=FAST_VERSION) def _do_train_job(self, sentences, alpha, inits): - """ - Train a single batch of sentences. Return 2-tuple `(effective word count after - ignoring unknown words and sentence length trimming, total word count)`. + """Train the model on a single batch of sentences. Return`. + + Parameters + ---------- + sentences : iterable of list of str + Corpus chunk to be used in this training batch. + alpha : float + The learning rate used in this batch. + inits : (np.ndarray, np.ndarray) + Each worker threads private work memory. + + Returns + ------- + (int, int) + 2-tuple (effective word count after ignoring unknown words and sentence length trimming, total word count). + """ work, neu1 = inits tally = 0 @@ -547,10 +772,24 @@ def _do_train_job(self, sentences, alpha, inits): return tally, self._raw_word_count(sentences) def _clear_post_train(self): - """Resets certain properties of the model, post training.""" + """Removes all L2-normalized vectors for words from the model. + + Notes + ----- + You will have to recompute them using :meth:`~gensim.models.word2vec.Word2Vec.init_sims`. + + """ self.wv.vectors_norm = None def _set_train_params(self, **kwargs): + """If `compute_loss` is passed, then it overrides the value set in the constructor. + + Parameters + ---------- + **kwargs + Key word model parameters which could include the `compute_loss` boolean. + + """ if 'compute_loss' in kwargs: self.compute_loss = kwargs['compute_loss'] self.running_training_loss = 0 @@ -574,29 +813,29 @@ def train(self, sentences, total_examples=None, total_words=None, Parameters ---------- - sentences : iterable of iterables + sentences : iterable of list of str The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. - total_examples : int + total_examples : int, optional Count of sentences. - total_words : int + total_words : int, optional Count of raw words in sentences. - epochs : int + epochs : int, optional Number of iterations (epochs) over the corpus. - start_alpha : float + start_alpha : float, optional Initial learning rate. - end_alpha : float + end_alpha : float, optional Final learning rate. Drops linearly from `start_alpha`. - word_count : int + word_count : int, optional Count of words already trained. Set this to 0 for the usual case of training on all words in sentences. - queue_factor : int + queue_factor : int, optional Multiplier for size of queue (number of workers * queue_factor). - report_delay : float + report_delay : float, optional Seconds to wait before reporting progress. - compute_loss: bool + compute_loss: bool, optional If True, computes and stores loss value which can be retrieved using `model.get_latest_training_loss()`. callbacks : :obj: `list` of :obj: `~gensim.models.callbacks.CallbackAny2Vec` List of callbacks that need to be executed/run at specific stages during training. @@ -609,6 +848,7 @@ def train(self, sentences, total_examples=None, total_words=None, >>> model = Word2Vec(min_count=1) >>> model.build_vocab(sentences) >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.iter) + (1, 30) """ @@ -623,32 +863,30 @@ def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor This does not change the fitted model in any way (see Word2Vec.train() for that). We have currently only implemented score for the hierarchical softmax scheme, - so you need to have run word2vec with hs=1 and negative=0 for this to work. + so you need to have run word2vec with `hs = 1` and `negative = 0` for this to work. Note that you should specify total_sentences; we'll run into problems if you ask to score more than this number of sentences but it is inefficient to set the value too high. - See the article by [#taddy]_ and the gensim demo at [#deepir]_ for examples of + See the `article by Matt Taddy: "Document Classification by Inversion of Distributed Language Representations" + `_ and the + `gensim demo `_ for examples of how to use such scores in document classification. - .. [#taddy] Taddy, Matt. Document Classification by Inversion of Distributed Language Representations, - in Proceedings of the 2015 Conference of the Association of Computational Linguistics. - .. [#deepir] https://github.com/piskvorky/gensim/blob/develop/docs/notebooks/deepir.ipynb - Parameters ---------- - sentences : iterable of iterables + sentences : iterable of list of str The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. - total_sentences : int + total_sentences : int, optional Count of sentences. - chunksize : int + chunksize : int, optional Chunksize of jobs - queue_factor : int + queue_factor : int, optional Multiplier for size of queue (number of workers * queue_factor). - report_delay : float + report_delay : float, optional Seconds to wait before reporting progress. """ @@ -757,29 +995,37 @@ def worker_loop(): def clear_sims(self): """Removes all L2-normalized vectors for words from the model. - You will have to recompute them using init_sims method. - """ + Notes + ----- + You will have to recompute them using :meth:`~gensim.models.word2vec.Word2Vec.init_sims`. + + """ self.wv.vectors_norm = None def intersect_word2vec_format(self, fname, lockf=0.0, binary=False, encoding='utf8', unicode_errors='strict'): - """Merge the input-hidden weight matrix from the original C word2vec-tool format - given, where it intersects with the current vocabulary. (No words are added to the - existing vocabulary, but intersecting words adopt the file's weights, and - non-intersecting words are left alone.) + """Merge the input-hidden weight matrix from the original C word2vec-tool format given, + where it intersects with the current vocabulary. + + Notes + ----- + No words are added to the existing vocabulary, but intersecting words adopt the file's weights, and + non-intersecting words are left alone. Parameters ---------- fname : str The file path used to save the vectors in - - binary : bool - If True, the data wil be saved in binary word2vec format, else it will be saved in plain text. - - lockf : float + lockf : float, optional Lock-factor value to be set for any imported word-vectors; the default value of 0.0 prevents further updating of the vector during subsequent training. Use 1.0 to allow further training updates of merged vectors. + binary : bool, optional + If True, the data wil be saved in binary word2vec format, else it will be saved in plain text. + encoding : str, optional + Encoding of `text` for `unicode` function (python2 only). + unicode_errors : str, optional + Error handling behaviour, used as parameter for `unicode` function (python2 only). """ overlap_count = 0 @@ -824,6 +1070,7 @@ def __getitem__(self, words): """ Deprecated. Use self.wv.__getitem__() instead. Refer to the documentation for `gensim.models.keyedvectors.Word2VecKeyedVectors.__getitem__` + """ return self.wv.__getitem__(words) @@ -832,6 +1079,7 @@ def __contains__(self, word): """ Deprecated. Use self.wv.__contains__() instead. Refer to the documentation for `gensim.models.keyedvectors.Word2VecKeyedVectors.__contains__` + """ return self.wv.__contains__(word) @@ -841,14 +1089,14 @@ def predict_output_word(self, context_words_list, topn=10): Parameters ---------- - context_words_list : :obj: `list` of :obj: `str` - List of context words - topn: int + context_words_list : list of str + List of context words. + topn: int, optional Return `topn` words and their probabilities Returns ------- - :obj: `list` of :obj: `tuple` + list of tuple of (str, float) `topn` length list of tuples of (word, probability) """ @@ -880,18 +1128,40 @@ def predict_output_word(self, context_words_list, topn=10): return [(self.wv.index2word[index1], prob_values[index1]) for index1 in top_indices] def init_sims(self, replace=False): - """ - init_sims() resides in KeyedVectors because it deals with syn0/vectors mainly, but because syn1 is not an + """Precompute L2-normalized vectors. + + Parameters + ---------- + replace : bool, optional + If True, forget the original vectors and only keep the normalized ones. This will save a lot of RAM. + + Notes + ----- + `init_sims` resides in KeyedVectors because it deals with syn0/vectors mainly, but because syn1 is not an attribute of KeyedVectors, it has to be deleted in this class, and the normalizing of syn0/vectors happens - inside of KeyedVectors + inside of KeyedVectors. + """ if replace and hasattr(self.trainables, 'syn1'): del self.trainables.syn1 - return self.wv.init_sims(replace) + self.wv.init_sims(replace) def reset_from(self, other_model): - """Borrow shareable pre-built structures (like vocab) from the other_model. Useful - if testing multiple models in parallel on the same corpus. + """Borrow shareable pre-built structures the other_model and reset hidden layer weights. + + Structures copied are: + * Vocabulary + * Index to word mapping + * Cum table (used for negative sampling) + * Cached Corpus length + + Useful if testing multiple models in parallel on the same corpus. + + Parameters + ---------- + other_model : :class:`~gensim.models.word2vec.Word2Vec` + Another model from where internal structures will be copied. + """ self.wv.vocab = other_model.wv.vocab self.wv.index2word = other_model.wv.index2word @@ -901,7 +1171,17 @@ def reset_from(self, other_model): @staticmethod def log_accuracy(section): - return Word2VecKeyedVectors.log_accuracy(section) + """Logs the models accuracy scored on a single section of a corpus. + + Static wrapper for :meth:`~gensim.models.word2vec.Word2VecKeyedVectors.log_accuracy`. + + Parameters + ---------- + section : iterable of list of str + Chunk of sentences use to score the models accuracy. + + """ + Word2VecKeyedVectors.log_accuracy(section) @deprecated("Method will be removed in 4.0.0, use self.wv.evaluate_word_analogies() instead") def accuracy(self, questions, restrict_vocab=30000, most_similar=None, case_insensitive=True): @@ -909,14 +1189,28 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=None, case_inse return self.wv.accuracy(questions, restrict_vocab, most_similar, case_insensitive) def __str__(self): + """Human readable representation of the model's state. + + Returns + ------- + str + Human readable representation of the model's state (vocabulary size, vector size and learning rate). + + """ return "%s(vocab=%s, size=%s, alpha=%s)" % ( self.__class__.__name__, len(self.wv.index2word), self.wv.vector_size, self.alpha ) def delete_temporary_training_data(self, replace_word_vectors_with_normalized=False): - """Discard parameters that are used in training and score. Use if you're sure you're done training a model. - If `replace_word_vectors_with_normalized` is set, forget the original vectors and only keep the normalized - ones = saves lots of memory! + """Discard parameters that are used in training and score. + + Use if you're sure you're done training a model. + + Parameters + ---------- + replace_word_vectors_with_normalized : bool, optional + If True, forget the original vectors and only keep the normalizedto save RAM. + """ if replace_word_vectors_with_normalized: self.init_sims(replace=True) @@ -937,6 +1231,14 @@ def save(self, *args, **kwargs): super(Word2Vec, self).save(*args, **kwargs) def get_latest_training_loss(self): + """Getter for the current value of the training loss. + + Returns + ------- + float + Current training loss. + + """ return self.running_training_loss @deprecated( @@ -966,7 +1268,8 @@ def save_word2vec_format(self, fname, fvocab=None, binary=False): @classmethod def load(cls, *args, **kwargs): - """Loads a previously saved `Word2Vec` model. Also see `save()`. + """Loads a previously saved :class:`~gensim.models.word2vec.Word2Vec` model. + Also see :meth:`~gensim.models.word2vec.Word2Vec.save`. Parameters ---------- @@ -975,8 +1278,9 @@ def load(cls, *args, **kwargs): Returns ------- - :obj: `~gensim.models.word2vec.Word2Vec` - Returns the loaded model as an instance of :class: `~gensim.models.word2vec.Word2Vec`. + :class:`~gensim.models.word2vec.Word2Vec` + Loaded model. + """ try: model = super(Word2Vec, cls).load(*args, **kwargs) @@ -1046,8 +1350,7 @@ def __iter__(self): class LineSentence(object): - """Simple format: one sentence = one line; words already preprocessed and separated by whitespace. - """ + """Simple format: one sentence = one line; words already preprocessed and separated by whitespace.""" def __init__(self, source, max_sentence_length=MAX_WORDS_IN_BATCH, limit=None): """ diff --git a/gensim/models/word2vec_inner.pyx b/gensim/models/word2vec_inner.pyx index 98e719c6d4..7d8ae26208 100755 --- a/gensim/models/word2vec_inner.pyx +++ b/gensim/models/word2vec_inner.pyx @@ -66,12 +66,45 @@ cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, con for i from 0 <= i < N[0] by 1: Y[i * (incY[0])] = (alpha[0]) * X[i * (incX[0])] + Y[i * (incY[0])] - cdef void fast_sentence_sg_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, REAL_t *syn0, REAL_t *syn1, const int size, const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, REAL_t *word_locks, const int _compute_loss, REAL_t *_running_training_loss_param) nogil: + """Train on a single effective word from the current batch, using the Skip-Gram model. + + In this model we are using a given word to predict a context word (a word that is + close to the one we are using as training). Hierarchical softmax is used to speed-up + training. + + Parameters + ---------- + word_point + Vector representation of the current word. + word_code + ASCII (char == uint8) representation of the current word. + codelen + Number of characters (length) in the current word. + syn0 + Embeddings for the words in the vocabulary (`model.wv.vectors`) + syn1 + Weights of the hidden layer in the model's trainable neural network. + size + Length of the embeddings. + word2_index + Index of the context word in the vocabulary. + alpha + Learning rate. + work + Private working memory for each worker. + word_locks + Lock factors for each word. A value of 0 will block training. + _compute_loss + Whether or not the loss should be computed at this step. + _running_training_loss_param + Running loss, used to debug or inspect how training progresses. + + """ cdef long long a, b cdef long long row1 = word2_index * size, row2, sgn @@ -124,7 +157,49 @@ cdef unsigned long long fast_sentence_sg_neg( const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, unsigned long long next_random, REAL_t *word_locks, const int _compute_loss, REAL_t *_running_training_loss_param) nogil: + """Train on a single effective word from the current batch, using the Skip-Gram model. + + In this model we are using a given word to predict a context word (a word that is + close to the one we are using as training). Negative sampling is used to speed-up + training. + + Parameters + ---------- + negative + Number of negative words to be sampled. + cum_table + Cumulative-distribution table using stored vocabulary word counts for + drawing random words (with a negative label). + cum_table_len + Length of the `cum_table` + syn0 + Embeddings for the words in the vocabulary (`model.wv.vectors`) + syn1neg + Weights of the hidden layer in the model's trainable neural network. + size + Length of the embeddings. + word_index + Index of the current training word in the vocabulary. + word2_index + Index of the context word in the vocabulary. + alpha + Learning rate. + work + Private working memory for each worker. + next_random + Seed to produce the index for the next word to be randomly sampled. + word_locks + Lock factors for each word. A value of 0 will block training. + _compute_loss + Whether or not the loss should be computed at this step. + _running_training_loss_param + Running loss, used to debug or inspect how training progresses. + + Returns + ------- + Seed to draw the training word for the next iteration of the same routine. + """ cdef long long a cdef long long row1 = word2_index * size, row2 cdef unsigned long long modulo = 281474976710655ULL @@ -173,7 +248,50 @@ cdef void fast_sentence_cbow_hs( const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, int i, int j, int k, int cbow_mean, REAL_t *word_locks, const int _compute_loss, REAL_t *_running_training_loss_param) nogil: + """Train on a single effective word from the current batch, using the CBOW method. + + Using this method we train the trainable neural network by attempting to predict a + given word by its context (words surrounding the one we are trying to predict). + Hierarchical softmax method is used to speed-up training. + + Parameters + ---------- + word_point + Vector representation of the current word. + word_code + ASCII (char == uint8) representation of the current word. + codelens + Number of characters (length) for all words in the context. + neu1 + Private working memory for every worker. + syn0 + Embeddings for the words in the vocabulary (`model.wv.vectors`) + syn1 + Weights of the hidden layer in the model's trainable neural network. + size + Length of the embeddings. + word2_index + Index of the context word in the vocabulary. + alpha + Learning rate. + work + Private working memory for each worker. + i + Index of the word to be predicted from the context. + j + Index of the word at the beginning of the context window. + k + Index of the word at the end of the context window. + cbow_mean + If 0, use the sum of the context word vectors as the prediction. If 1, use the mean. + word_locks + Lock factors for each word. A value of 0 will block training. + _compute_loss + Whether or not the loss should be computed at this step. + _running_training_loss_param + Running loss, used to debug or inspect how training progresses. + """ cdef long long a, b cdef long long row2, sgn cdef REAL_t f, g, count, inv_count = 1.0, f_dot, lprob @@ -228,7 +346,55 @@ cdef unsigned long long fast_sentence_cbow_neg( const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, int i, int j, int k, int cbow_mean, unsigned long long next_random, REAL_t *word_locks, const int _compute_loss, REAL_t *_running_training_loss_param) nogil: + """Train on a single effective word from the current batch, using the CBOW method. + + Using this method we train the trainable neural network by attempting to predict a + given word by its context (words surrounding the one we are trying to predict). + Negative sampling is used to speed-up training. + + Parameters + ---------- + negative + Number of negative words to be sampled. + cum_table + Cumulative-distribution table using stored vocabulary word counts for + drawing random words (with a negative label). + cum_table_len + Length of the `cum_table` + codelens + Number of characters (length) for all words in the context. + neu1 + Private working memory for every worker. + syn0 + Embeddings for the words in the vocabulary (`model.wv.vectors`) + syn1neg + Weights of the hidden layer in the model's trainable neural network. + size + Length of the embeddings. + indexes + Indexes of the context words in the vocabulary. + alpha + Learning rate. + work + Private working memory for each worker. + i + Index of the word to be predicted from the context. + j + Index of the word at the beginning of the context window. + k + Index of the word at the end of the context window. + cbow_mean + If 0, use the sum of the context word vectors as the prediction. If 1, use the mean. + next_random + Seed for the drawing the predicted word for the next iteration of the same routine. + word_locks + Lock factors for each word. A value of 0 will block training. + _compute_loss + Whether or not the loss should be computed at this step. + _running_training_loss_param + Running loss, used to debug or inspect how training progresses. + """ cdef long long a cdef long long row2 cdef unsigned long long modulo = 281474976710655ULL @@ -294,11 +460,35 @@ cdef unsigned long long fast_sentence_cbow_neg( def train_batch_sg(model, sentences, alpha, _work, compute_loss): + """Update skip-gram model by training on a batch of sentences. + + Called internally from :meth:`~gensim.models.word2vec.Word2Vec.train`. + + Parameters + ---------- + model : :class:`~gensim.models.word2Vec.Word2Vec` + The Word2Vec model instance to train. + sentences : iterable of list of str + The corpus used to train the model. + alpha : float + The learning rate + _work : np.ndarray + Private working memory for each worker. + compute_loss : bool + Whether or not the training loss should be computed in this batch. + + Returns + ------- + int + Number of words in the vocabulary actually used for training (They already existed in the vocabulary + and were not discarded by negative sampling). + + """ cdef int hs = model.hs cdef int negative = model.negative cdef int sample = (model.vocabulary.sample != 0) - cdef int _compute_loss = (1 if compute_loss == True else 0) + cdef int _compute_loss = (1 if compute_loss else 0) cdef REAL_t _running_training_loss = model.running_training_loss cdef REAL_t *syn0 = (np.PyArray_DATA(model.wv.vectors)) @@ -401,6 +591,31 @@ def train_batch_sg(model, sentences, alpha, _work, compute_loss): def train_batch_cbow(model, sentences, alpha, _work, _neu1, compute_loss): + """Update CBOW model by training on a batch of sentences. + + Called internally from :meth:`~gensim.models.word2vec.Word2Vec.train`. + + Parameters + ---------- + model : :class:`~gensim.models.word2vec.Word2Vec` + The Word2Vec model instance to train. + sentences : iterable of list of str + The corpus used to train the model. + alpha : float + The learning rate. + _work : np.ndarray + Private working memory for each worker. + _neu1 : np.ndarray + Private working memory for each worker. + compute_loss : bool + Whether or not the training loss should be computed in this batch. + + Returns + ------- + int + Number of words in the vocabulary actually used for training (They already existed in the vocabulary + and were not discarded by negative sampling). + """ cdef int hs = model.hs cdef int negative = model.negative cdef int sample = (model.vocabulary.sample != 0) @@ -506,8 +721,29 @@ def train_batch_cbow(model, sentences, alpha, _work, _neu1, compute_loss): return effective_words -# Score is only implemented for hierarchical softmax def score_sentence_sg(model, sentence, _work): + """Obtain likelihood score for a single sentence in a fitted skip-gram representation. + + Notes + ----- + This scoring function is only implemented for hierarchical softmax (`model.hs == 1`). + The model should have been trained using the skip-gram model (`model.sg` == 1`). + + Parameters + ---------- + model : :class:`~gensim.models.word2vec.Word2Vec` + The trained model. It **MUST** have been trained using hierarchical softmax and the skip-gram algorithm. + sentence : list of str + The words comprising the sentence to be scored. + _work : np.ndarray + Private working memory for each worker. + + Returns + ------- + float + The probability assigned to this sentence by the Skip-Gram model. + + """ cdef REAL_t *syn0 = (np.PyArray_DATA(model.wv.vectors)) cdef REAL_t *work @@ -586,7 +822,30 @@ cdef void score_pair_sg_hs( work[0] += f def score_sentence_cbow(model, sentence, _work, _neu1): + """Obtain likelihood score for a single sentence in a fitted CBOW representation. + + Notes + ----- + This scoring function is only implemented for hierarchical softmax (`model.hs == 1`). + The model should have been trained using the skip-gram model (`model.cbow` == 1`). + + Parameters + ---------- + model : :class:`~gensim.models.word2vec.Word2Vec` + The trained model. It **MUST** have been trained using hierarchical softmax and the CBOW algorithm. + sentence : list of str + The words comprising the sentence to be scored. + _work : np.ndarray + Private working memory for each worker. + _neu1 : np.ndarray + Private working memory for each worker. + + Returns + ------- + float + The probability assigned to this sentence by the Skip-Gram model. + """ cdef int cbow_mean = model.cbow_mean cdef REAL_t *syn0 = (np.PyArray_DATA(model.wv.vectors)) @@ -685,6 +944,13 @@ def init(): Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized into table EXP_TABLE. Also calculate log(sigmoid(x)) into LOG_TABLE. + Returns + ------- + {0, 1, 2} + Enumeration to signify underlying data type returned by the BLAS dot product calculation. + 0 signifies double, 1 signifies double, and 2 signifies that custom cython loops were used + instead of BLAS. + """ global our_dot global our_saxpy