diff --git a/CHANGELOG.txt b/CHANGELOG.txt index dfeb253cb8..9b0ac43a84 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,12 +1,26 @@ Changes ======= +NEXT -0.12.2 +* Make show_topics return value consistent across models (Christopher Corley, #448) + - All models with the `show_topics` method should return a list of + `(topic_number, topic)` tuples, where `topic` is a list of + `(word, probability)` tuples. + - This is a breaking change that affects users of the `LsiModel`, `LdaModel`, + and `LdaMulticore` that may be reliant on the old tuple layout of + `(probability, word)`. +0.12.2, 19/09/2015 + +* tutorial on text summarization (Ólavur Mortensen, #436) +* more flexible vocabulary construction in word2vec & doc2vec (Philipp Dowling, #434) * added support for sliced TransformedCorpus objects, so that after applying (for instance) TfidfModel the returned corpus remains randomly indexable. (Matti Lyra, #425) * changed the LdaModel.save so that a custom `ignore` list can be passed in (Matti Lyra, #331) * added support for NumPy style fancy indexing to corpus objects (Matti Lyra, #414) +* py3k fix in distributed LSI (spacecowboy, #433) +* Windows fix for setup.py (#428) +* fix compatibility for scipy 0.16.0 (#415) 0.12.1, 20/07/2015 diff --git a/README.rst b/README.rst index ab4914e54c..bd212f329c 100644 --- a/README.rst +++ b/README.rst @@ -19,7 +19,7 @@ Target audience is the *natural language processing* (NLP) and *information retr Features --------- -* All algorithms are **memory-independent** w.r.t. the corpus size (can process input larger than RAM), +* All algorithms are **memory-independent** w.r.t. the corpus size (can process input larger than RAM, streamed, out-of-core), * **Intuitive interfaces** * easy to plug in your own input corpus/datastream (trivial streaming API) diff --git a/docs/notebooks/summarization_tutorial.ipynb b/docs/notebooks/summarization_tutorial.ipynb new file mode 100644 index 0000000000..6ef81218bf --- /dev/null +++ b/docs/notebooks/summarization_tutorial.ipynb @@ -0,0 +1,406 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:6b9b76544213a02f8bf906cdada222aa43d1d502664b11cd363728bc96c21b5f" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "

Tutorial: automatic summarization using Gensim

\n", + "\n", + "This module automatically summarizes the given text, by extracting one or more important sentences from the text. In a similar way, it can also extract keywords. This tutorial will teach you to use this summarization module via some examples. First, we will try a small example, then we will try two larger ones, and then we will review the performance of the summarizer in terms of speed.\n", + "\n", + "This summarizer is based on the \"TextRank\" algorithm, from an [article](http://web.eecs.umich.edu/%7Emihalcea/papers/mihalcea.emnlp04.pdf) by Mihalcea et al. This algorithm was later improved upon by Barrios et al. in another [article](https://raw.githubusercontent.com/summanlp/docs/master/articulo/articulo-en.pdf), by introducing something called a \"BM25 ranking function\". \n", + "\n", + "This tutorial assumes that you are familiar with Python and have [installed Gensim](http://radimrehurek.com/gensim/install.html).\n", + "\n", + "Note: Gensim's summarization only works for English for now, because the text is pre-processed so that stopwords are removed and the words are stemmed, and these processes are language-dependent.\n", + "\n", + "\n", + "

Small example

\n", + "\n", + "First of all, we import the function \"summarize\"." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import logging\n", + "logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)\n", + "\n", + "from gensim.summarization import summarize" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 4 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will try summarizing a small toy example; later we will use a larger piece of text. In reality, the text is too small, but it suffices as an illustrative example." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "text = \"Thomas A. Anderson is a man living two lives. By day he is an \" + \\\n", + " \"average computer programmer and by night a hacker known as \" + \\\n", + " \"Neo. Neo has always questioned his reality, but the truth is \" + \\\n", + " \"far beyond his imagination. Neo finds himself targeted by the \" + \\\n", + " \"police when he is contacted by Morpheus, a legendary computer \" + \\\n", + " \"hacker branded a terrorist by the government. Morpheus awakens \" + \\\n", + " \"Neo to the real world, a ravaged wasteland where most of \" + \\\n", + " \"humanity have been captured by a race of machines that live \" + \\\n", + " \"off of the humans' body heat and electrochemical energy and \" + \\\n", + " \"who imprison their minds within an artificial reality known as \" + \\\n", + " \"the Matrix. As a rebel against the machines, Neo must return to \" + \\\n", + " \"the Matrix and confront the agents: super-powerful computer \" + \\\n", + " \"programs devoted to snuffing out Neo and the entire human \" + \\\n", + " \"rebellion. \"\n", + "\n", + "print 'Input text:'\n", + "print text" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Input text:\n", + "Thomas A. Anderson is a man living two lives. By day he is an average computer programmer and by night a hacker known as Neo. Neo has always questioned his reality, but the truth is far beyond his imagination. Neo finds himself targeted by the police when he is contacted by Morpheus, a legendary computer hacker branded a terrorist by the government. Morpheus awakens Neo to the real world, a ravaged wasteland where most of humanity have been captured by a race of machines that live off of the humans' body heat and electrochemical energy and who imprison their minds within an artificial reality known as the Matrix. As a rebel against the machines, Neo must return to the Matrix and confront the agents: super-powerful computer programs devoted to snuffing out Neo and the entire human rebellion. \n" + ] + } + ], + "prompt_number": 5 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To summarize this text, we pass the raw string data as input to the function \"summarize\", and it will return a summary.\n", + "\n", + "Note: make sure that the string does not contain any newlines where the line breaks in a sentence. A sentence with a newline in it (i.e. a carriage return, \"\\n\") will be treated as two sentences." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print 'Summary:'\n", + "print summarize(text)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Summary:\n", + "By day he is an average computer programmer and by night a hacker known as Neo. Neo has always questioned his reality, but the truth is far beyond his imagination.\n" + ] + } + ], + "prompt_number": 6 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use the \"split\" option if you want a list of strings instead of a single string." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print summarize(text, split=True)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "['By day he is an average computer programmer and by night a hacker known as Neo. Neo has always questioned his reality, but the truth is far beyond his imagination.']\n" + ] + } + ], + "prompt_number": 6 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can adjust how much text the summarizer outputs via the \"ratio\" parameter or the \"word_count\" parameter. Using the \"ratio\" parameter, you specify what fraction of sentences in the original text should be returned as output. Below we specify that we want 50% of the original text (the default is 20%)." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print 'Summary:'\n", + "print summarize(text, ratio=0.5)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Summary:\n", + "By day he is an average computer programmer and by night a hacker known as Neo. Neo has always questioned his reality, but the truth is far beyond his imagination.\n", + "Neo finds himself targeted by the police when he is contacted by Morpheus, a legendary computer hacker branded a terrorist by the government.\n", + "Morpheus awakens Neo to the real world, a ravaged wasteland where most of humanity have been captured by a race of machines that live off of the humans' body heat and electrochemical energy and who imprison their minds within an artificial reality known as the Matrix.\n" + ] + } + ], + "prompt_number": 8 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the \"word_count\" parameter, we specify the maximum amount of words we want in the summary. Below we have specified that we want no more than 50 words." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print 'Summary:'\n", + "print summarize(text, word_count=50)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Summary:\n", + "By day he is an average computer programmer and by night a hacker known as Neo. Neo has always questioned his reality, but the truth is far beyond his imagination.\n" + ] + } + ], + "prompt_number": 9 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As mentioned earlier, this module also supports keyword extraction. Keyword extraction works in the same way as summary generation (i.e. sentence extraction), in that the algorithm tries to find words that are important or seem representative of the entire text. They keywords are not always single words; in the case of multi-word keywords, they are typically all nouns." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from gensim.summarization import keywords\n", + "\n", + "print 'Keywords:'\n", + "print keywords(text)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Keywords:\n", + "humanity\n", + "human\n", + "neo\n", + "humans body\n", + "super\n", + "hacker\n", + "reality\n" + ] + } + ], + "prompt_number": 10 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "

Larger example

\n", + "\n", + "Let us try an example with a larger piece of text. We will be using a synopsis of the movie \"The Matrix\", which we have taken from [this](http://www.imdb.com/title/tt0133093/synopsis?ref_=ttpl_pl_syn) IMDb page.\n", + "\n", + "In the code below, we read the text file directly from a web-page using \"requests\". Then we produce a summary and some keywords." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import requests\n", + "\n", + "text = requests.get('http://rare-technologies.com/the_matrix_synopsis.txt').text\n", + "\n", + "print 'Summary:'\n", + "print summarize(text, ratio=0.01)\n", + "\n", + "print '\\nKeywords:'\n", + "print keywords(text, ratio=0.01)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Summary:\n", + "Anderson, a software engineer for a Metacortex, the other life as Neo, a computer hacker \"guilty of virtually every computer crime we have a law for.\" Agent Smith asks him to help them capture Morpheus, a dangerous terrorist, in exchange for amnesty.\n", + "Morpheus explains that he's been searching for Neo his entire life and asks if Neo feels like \"Alice in Wonderland, falling down the rabbit hole.\" He explains to Neo that they exist in the Matrix, a false reality that has been constructed for humans to hide the truth.\n", + "Neo is introduced to Morpheus's crew including Trinity; Apoc (Julian Arahanga), a man with long, flowing black hair; Switch; Cypher (bald with a goatee); two brawny brothers, Tank (Marcus Chong) and Dozer (Anthony Ray Parker); and a young, thin man named Mouse (Matt Doran).\n", + "Cypher cuts up a juicy steak and ruminates that he knows the steak is merely the simulation telling his brain that it is delicious and juicy, but after nine years he has discovered that \"ignorance is bliss.\" He strikes a deal for the machines to reinsert his body into a power plant, reinsert him into the Matrix, and he'll help the Agents." + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "\n", + "Keywords:\n", + "neo\n", + "morpheus\n", + "trinity\n", + "cypher\n", + "agents\n", + "agent\n", + "smith\n", + "tank\n", + "says\n", + "saying" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 12 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you know this movie, you see that this summary is actually quite good. We also see that some of the most important characters (Neo, Morpheus, Trinity) were extracted as keywords.\n", + "\n", + "

Another example

\n", + "\n", + "Let's try an example similar to the one above. This time, we will use the [IMDb synopsis](http://www.imdb.com/title/tt0118715/synopsis?ref_=tt_stry_pl) of \"The Big Lebowski\".\n", + "\n", + "Again, we download the text and produce a summary and some keywords." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import requests\n", + "\n", + "text = requests.get('http://rare-technologies.com/the_big_lebowski_synopsis.txt').text\n", + "\n", + "print 'Summary:'\n", + "print summarize(text, ratio=0.01)\n", + "\n", + "print '\\nKeywords:'\n", + "print keywords(text, ratio=0.01)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Summary:\n", + "The answering machine records a woman introducing herself as Maude Lebowski and saying that she is the one who took his rug and has sent a car to pick Dude up at his apartment.\n", + "As he climbs out of bed to make a White Russian, Maude asks about the apartment and Dude explains that Treehorn's thugs most likely vandalized it looking for Lebowski's money." + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "\n", + "Keywords:\n", + "dude\n", + "dudes\n", + "walter\n", + "lebowski\n", + "brandt\n", + "maude\n", + "donny\n", + "bunny" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 13 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This time around, the summary is not of high quality, as it does not tell us much about the movie. In a way, this might not be the algorithms fault, rather this text simply doesn't contain one or two sentences that capture the essence of the text as in \"The Matrix\" synopsis.\n", + "\n", + "The keywords, however, managed to find some of the main characters.\n", + "\n", + "

Performance

\n", + "\n", + "We will test how the speed of the summarizer scales with the size of the dataset. These tests were run on an Intel Core i5 4210U CPU @ 1.70 GHz x 4 processor. Note that the summarizer does not support multithreading (parallel processing).\n", + "\n", + "The tests were run on the book \"Honest Abe\" by Alonzo Rothschild. Download the book in plain-text here. \n", + "\n", + "In the plot below, we see the running times together with the sizes of the datasets. To create datasets of different sizes, we have simply taken prefixes of text; in other words we take the first n characters of the book. The algorithm seems to be quadratic in time, so one needs to be careful before plugging a large dataset into the summarizer.\n", + "\n", + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "

Text-content dependent running times

\n", + "\n", + "The running time is not only dependent on the size of the dataset. For example, summarizing \"The Matrix\" synopsis (about 36,000 characters) takes about 3.1 seconds, while summarizing 35,000 characters of this book takes about 8.5 seconds. So the former is more than twice as fast. \n", + "\n", + "One reason for this difference in running times is the data structure that is used. The algorithm represents the data using a graph, where vertices (nodes) are sentences, and then constructs weighted edges between the vertices that represent how the sentences relate to each other. This means that every piece of text will have a different graph, thus making the running times different. The size of this data structure is quadratic in the worst case (the worst case is when each vertex has an edge to every other vertex).\n", + "\n", + "Another possible reason for the difference in running times is that the problems converge at different rates, meaning that the error drops slower for some datasets than for others.\n" + ] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/docs/src/conf.py b/docs/src/conf.py index 2a3885f048..3ee17afd78 100644 --- a/docs/src/conf.py +++ b/docs/src/conf.py @@ -52,9 +52,9 @@ # built documents. # # The short X.Y version. -version = '0.12.1' +version = '0.12.2' # The full version, including alpha/beta/rc tags. -release = '0.12.1' +release = '0.12.2' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 1b143d92ad..b0c31da313 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -390,8 +390,9 @@ def init_sims(self, replace=False): self.doctag_syn0norm = self.doctag_syn0 else: if self.mapfile_path: - self.doctag_syn0norm = np_memmap(self.mapfile_path+'.doctag_syn0norm', dtype=REAL, - mode='w+', shape=self.doctag_syn0.shape) + self.doctag_syn0norm = np_memmap( + self.mapfile_path+'.doctag_syn0norm', dtype=REAL, + mode='w+', shape=self.doctag_syn0.shape) else: self.doctag_syn0norm = empty(self.doctag_syn0.shape, dtype=REAL) np_divide(self.doctag_syn0, sqrt((self.doctag_syn0 ** 2).sum(-1))[..., newaxis], self.doctag_syn0norm) @@ -418,10 +419,14 @@ def most_similar(self, positive=[], negative=[], topn=10, clip_start=0, clip_end positive = [positive] # add weights for each doc, if not already present; default to 1.0 for positive and -1.0 for negative docs - positive = [(doc, 1.0) if isinstance(doc, string_types + (ndarray,) + integer_types) - else doc for doc in positive] - negative = [(doc, -1.0) if isinstance(doc, string_types + (ndarray,) + integer_types) - else doc for doc in negative] + positive = [ + (doc, 1.0) if isinstance(doc, string_types + (ndarray,) + integer_types) + else doc for doc in positive + ] + negative = [ + (doc, -1.0) if isinstance(doc, string_types + (ndarray,) + integer_types) + else doc for doc in negative + ] # compute the weighted average of all docs all_docs, mean = set(), [] @@ -499,7 +504,7 @@ class Doc2Vec(Word2Vec): def __init__(self, documents=None, size=300, alpha=0.025, window=8, min_count=5, max_vocab_size=None, sample=0, seed=1, workers=1, min_alpha=0.0001, dm=1, hs=1, negative=0, dbow_words=0, dm_mean=0, dm_concat=0, dm_tag_count=1, - docvecs=None, docvecs_mapfile=None, comment=None, **kwargs): + docvecs=None, docvecs_mapfile=None, comment=None, trim_rule=None, **kwargs): """ Initialize the model from an iterable of `documents`. Each document is a TaggedDocument object that will be used for training. @@ -553,6 +558,13 @@ def __init__(self, documents=None, size=300, alpha=0.025, window=8, min_count=5, `dbow_words` if set to 1 trains word-vectors (in skip-gram fashion) simultaneous with DBOW doc-vector training; default is 0 (faster training of doc-vectors only). + `trim_rule` = vocabulary trimming rule, specifies whether certain words should remain + in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). + Can be None (min_count will be used), or a callable that accepts parameters (word, count, min_count) and + returns either util.RULE_DISCARD, util.RULE_KEEP or util.RULE_DEFAULT. + Note: The rule, if given, is only used prune vocabulary during build_vocab() and is not stored as part + of the model. + """ super(Doc2Vec, self).__init__( size=size, alpha=alpha, window=window, min_count=min_count, max_vocab_size=max_vocab_size, @@ -569,7 +581,7 @@ def __init__(self, documents=None, size=300, alpha=0.025, window=8, min_count=5, self.docvecs = docvecs or DocvecsArray(docvecs_mapfile) self.comment = comment if documents is not None: - self.build_vocab(documents) + self.build_vocab(documents, trim_rule=trim_rule) self.train(documents) @property @@ -597,7 +609,7 @@ def reset_from(self, other_model): self.docvecs.borrow_from(other_model.docvecs) super(Doc2Vec, self).reset_from(other_model) - def scan_vocab(self, documents, progress_per=10000): + def scan_vocab(self, documents, progress_per=10000, trim_rule=None): logger.info("collecting all words and their counts") document_no = -1 total_words = 0 @@ -622,7 +634,7 @@ def scan_vocab(self, documents, progress_per=10000): total_words += len(document.words) if self.max_vocab_size and len(vocab) > self.max_vocab_size: - utils.prune_vocab(vocab, min_reduce) + utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule) min_reduce += 1 logger.info("collected %i word types and %i unique tags from a corpus of %i examples and %i words", diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py index b923a20c88..33b61e6e70 100755 --- a/gensim/models/hdpmodel.py +++ b/gensim/models/hdpmodel.py @@ -606,7 +606,7 @@ def show_topics(self, topics=10, topn=10, log=False, formatted=True): if log: logger.info(topic) else: - topic = [k, topic_terms] + topic = (k, topic_terms) shown.append(topic) return shown diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index a5dcaa42ef..009179e26f 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -308,7 +308,7 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, # Initialize the variational distribution q(beta|lambda) self.state = LdaState(self.eta, (self.num_topics, self.num_terms)) self.state.sstats = numpy.random.gamma(100., 1. / 100., (self.num_topics, self.num_terms)) - self.sync_state() + self.expElogbeta = numpy.exp(dirichlet_expectation(self.state.sstats)) # if a training corpus was provided, start estimating the model right away if corpus is not None: @@ -688,7 +688,7 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True): (10 words per topic, by default). The topics are returned as a list -- a list of strings if `formatted` is - True, or a list of (probability, word) 2-tuples if False. + True, or a list of `(word, probability)` 2-tuples if False. If `log` is True, also output this result to log. @@ -716,7 +716,7 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True): else: topic = self.show_topic(i, topn=num_words) - shown.append(topic) + shown.append((i, topic)) if log: logger.info("topic #%i (%.3f): %s", i, self.alpha[i], topic) @@ -724,21 +724,30 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True): def show_topic(self, topicid, topn=10): """ - Return a list of `(words_probability, word)` 2-tuples for the most probable + Return a list of `(word, probability)` 2-tuples for the most probable words in topic `topicid`. Only return 2-tuples for the topn most probable words (ignore the rest). + """ + return [(self.id2word[id], value) for id, value in self.get_topic_terms(topicid, topn)] + + def get_topic_terms(self, topicid, topn=10): + """ + Return a list of `(word_id, probability)` 2-tuples for the most + probable words in topic `topicid`. + + Only return 2-tuples for the topn most probable words (ignore the rest). + """ topic = self.state.get_lambda()[topicid] topic = topic / topic.sum() # normalize to probability distribution bestn = matutils.argsort(topic, topn, reverse=True) - beststr = [(topic[id], self.id2word[id]) for id in bestn] - return beststr + return [(id, topic[id]) for id in bestn] def print_topic(self, topicid, topn=10): """Return the result of `show_topic`, but formatted as a single string.""" - return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, topn)]) + return ' + '.join(['%.3f*%s' % (v, k) for k, v in self.show_topic(topicid, topn)]) def top_topics(self, corpus, num_words=20): """ @@ -788,11 +797,13 @@ def top_topics(self, corpus, num_words=20): # l_docs is v_l^(t) l_docs = doc_word_list[l] - # co_doc_frequency is D(v_m^(t), v_l^(t)) - co_doc_frequency = len(m_docs.intersection(l_docs)) + # make sure this word appears in some documents. + if len(l_docs) > 0: + # co_doc_frequency is D(v_m^(t), v_l^(t)) + co_doc_frequency = len(m_docs.intersection(l_docs)) - # add to the coherence sum for these two words m, l - coherence += numpy.log((co_doc_frequency + 1.0) / len(l_docs)) + # add to the coherence sum for these two words m, l + coherence += numpy.log((co_doc_frequency + 1.0) / len(l_docs)) coherence_scores.append((str_topics[t], coherence)) diff --git a/gensim/models/lsimodel.py b/gensim/models/lsimodel.py index ac6de937d1..a11c8abfbd 100644 --- a/gensim/models/lsimodel.py +++ b/gensim/models/lsimodel.py @@ -63,7 +63,7 @@ from six.moves import xrange -logger = logging.getLogger('gensim.models.lsimodel') +logger = logging.getLogger(__name__) # accuracy defaults for the multi-pass stochastic algo @@ -474,7 +474,7 @@ def show_topic(self, topicno, topn=10): of the topic (both negative and positive). >>> lsimodel.show_topic(10, topn=5) - [(-0.340, "category"), (0.298, "$M$"), (0.183, "algebra"), (-0.174, "functor"), (-0.168, "operator")] + [("category", -0.340), ("$M$", 0.298), ("algebra", 0.183), ("functor", -0.174), ("operator", -0.168)] """ # size of the projection matrix can actually be smaller than `self.num_topics`, @@ -485,7 +485,7 @@ def show_topic(self, topicno, topn=10): c = numpy.asarray(self.projection.u.T[topicno, :]).flatten() norm = numpy.sqrt(numpy.sum(numpy.dot(c, c))) most = matutils.argsort(numpy.abs(c), topn, reverse=True) - return [(1.0 * c[val] / norm, self.id2word[val]) for val in most] + return [(self.id2word[val], 1.0 * c[val] / norm) for val in most] def print_topic(self, topicno, topn=10): """ @@ -495,7 +495,7 @@ def print_topic(self, topicno, topn=10): '-0.340 * "category" + 0.298 * "$M$" + 0.183 * "algebra" + -0.174 * "functor" + -0.168 * "operator"' """ - return ' + '.join(['%.3f*"%s"' % v for v in self.show_topic(topicno, topn)]) + return ' + '.join(['%.3f*"%s"' % (v, k) for k, v in self.show_topic(topicno, topn)]) def show_topics(self, num_topics=-1, num_words=10, log=False, formatted=True): """ @@ -503,7 +503,7 @@ def show_topics(self, num_topics=-1, num_words=10, log=False, formatted=True): For each topic, show `num_words` most significant words (10 words by default). The topics are returned as a list -- a list of strings if `formatted` is - True, or a list of (weight, word) 2-tuples if False. + True, or a list of `(word, probability)` 2-tuples if False. If `log` is True, also output this result to log. @@ -517,7 +517,7 @@ def show_topics(self, num_topics=-1, num_words=10, log=False, formatted=True): topic = self.print_topic(i, topn=num_words) else: topic = self.show_topic(i, topn=num_words) - shown.append(topic) + shown.append((i, topic)) if log: logger.info("topic #%i(%.3f): %s", i, self.projection.s[i], topic) return shown diff --git a/gensim/models/phrases.py b/gensim/models/phrases.py index f9415c8bc1..96cb9ce072 100644 --- a/gensim/models/phrases.py +++ b/gensim/models/phrases.py @@ -57,6 +57,8 @@ """ +import sys +import os import logging from collections import defaultdict @@ -121,14 +123,12 @@ def __init__(self, sentences=None, min_count=5, threshold=10.0, if sentences is not None: self.add_vocab(sentences) - def __str__(self): """Get short string representation of this phrase detector.""" return "%s<%i vocab, min_count=%s, threshold=%s, max_vocab_size=%s>" % ( self.__class__.__name__, len(self.vocab), self.min_count, self.threshold, self.max_vocab_size) - @staticmethod def learn_vocab(sentences, max_vocab_size, delimiter=b'_'): """Collect unigram/bigram counts from the `sentences` iterable.""" @@ -147,7 +147,7 @@ def learn_vocab(sentences, max_vocab_size, delimiter=b'_'): vocab[delimiter.join(bigram)] += 1 total_words += 1 - if sentence: # add last word skipped by previous loop + if sentence: # add last word skipped by previous loop word = sentence[-1] vocab[word] += 1 @@ -159,7 +159,6 @@ def learn_vocab(sentences, max_vocab_size, delimiter=b'_'): (len(vocab), total_words, sentence_no + 1)) return min_reduce, vocab - def add_vocab(self, sentences): """ Merge the collected counts `vocab` into this phrase detector. @@ -172,16 +171,15 @@ def add_vocab(self, sentences): # counts collected in previous learn_vocab runs. min_reduce, vocab = self.learn_vocab(sentences, self.max_vocab_size, self.delimiter) - logger.info("merging %i counts into %s" % (len(vocab), self)) + logger.info("merging %i counts into %s", len(vocab), self) self.min_reduce = max(self.min_reduce, min_reduce) for word, count in iteritems(vocab): self.vocab[word] += count if len(self.vocab) > self.max_vocab_size: - prune_vocab(self.vocab, self.min_reduce) + utils.prune_vocab(self.vocab, self.min_reduce) self.min_reduce += 1 - logger.info("merged %s" % self) - + logger.info("merged %s", self) def __getitem__(self, sentence): """ @@ -245,7 +243,6 @@ def __getitem__(self, sentence): if __name__ == '__main__': - import sys, os logging.basicConfig(format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s', level=logging.INFO) logging.info("running %s" % " ".join(sys.argv)) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 3763dc8024..aff6cf4371 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -76,7 +76,10 @@ from copy import deepcopy from collections import defaultdict import threading -import time +import itertools + +from gensim.utils import keep_vocab_item + try: from queue import Queue, Empty except ImportError: @@ -84,7 +87,7 @@ from numpy import exp, log, dot, zeros, outer, random, dtype, float32 as REAL,\ uint32, seterr, array, uint8, vstack, fromstring, sqrt, newaxis,\ - ndarray, empty, sum as np_sum, prod, ones + ndarray, empty, sum as np_sum, prod, ones, ascontiguousarray from gensim import utils, matutils # utility fnc for pickling, common scipy operations etc from six import iteritems, itervalues, string_types @@ -94,10 +97,12 @@ logger = logging.getLogger("gensim.models.word2vec") try: - from gensim.models.word2vec_inner import train_sentence_sg, train_batch_sg, train_sentence_cbow, FAST_VERSION + from gensim.models.word2vec_inner import train_sentence_sg, train_batch_sg, train_sentence_cbow + from gensim.models.word2vec_inner import FAST_VERSION, MAX_WORDS_IN_BATCH, MAX_BATCH_SENTENCES except ImportError: # failed... fall back to plain numpy (20-80x slower training than the above) FAST_VERSION = -1 + MAX_WORDS_IN_BATCH, MAX_BATCH_SENTENCES = 100000, 100 def train_sentence_sg(model, sentence, alpha, work=None): """ @@ -338,7 +343,8 @@ class Word2Vec(utils.SaveLoad): def __init__( self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None, sample=0, seed=1, workers=1, min_alpha=0.0001, - sg=1, hs=1, negative=0, cbow_mean=0, hashfxn=hash, iter=1, null_word=0, batch=False): # TODO: remove "batch" input variable when done working on batching. + sg=1, hs=1, negative=0, cbow_mean=0, hashfxn=hash, iter=1, null_word=0, + trim_rule=None, sorted_vocab=1, batch-False): # FIXME: remove "batch" input variable when done working on batching. """ Initialize the model from an iterable of `sentences`. Each sentence is a list of words (unicode strings) that will be used for training. @@ -387,6 +393,16 @@ def __init__( `iter` = number of iterations (epochs) over the corpus. + `trim_rule` = vocabulary trimming rule, specifies whether certain words should remain + in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). + Can be None (min_count will be used), or a callable that accepts parameters (word, count, min_count) and + returns either util.RULE_DISCARD, util.RULE_KEEP or util.RULE_DEFAULT. + Note: The rule, if given, is only used prune vocabulary during build_vocab() and is not stored as part + of the model. + + `sorted_vocab` = if 1 (default), sort the vocabulary by descending frequency before + assigning word indexes. + """ self.vocab = {} # mapping from a word (string) to a Vocab object self.index2word = [] # map from a word's matrix index (int) to word (string) @@ -414,10 +430,12 @@ def __init__( self.train_count = 0 self.total_train_time = 0 self.batch = batch + self.sorted_vocab = sorted_vocab + if sentences is not None: if isinstance(sentences, GeneratorType): raise TypeError("You can't pass a generator as the sentences argument. Try an iterator.") - self.build_vocab(sentences) + self.build_vocab(sentences, trim_rule=trim_rule) self.train(sentences) def make_cum_table(self, power=0.75, domain=2**31 - 1): @@ -475,17 +493,17 @@ def create_binary_tree(self): logger.info("built huffman tree with maximum node depth %i", max_depth) - def build_vocab(self, sentences, keep_raw_vocab=False): + def build_vocab(self, sentences, keep_raw_vocab=False, trim_rule=None): """ Build vocabulary from a sequence of sentences (can be a once-only generator stream). Each sentence must be a list of unicode strings. """ - self.scan_vocab(sentences) # initial survey - self.scale_vocab(keep_raw_vocab) # trim by min_count & precalculate downsampling + self.scan_vocab(sentences, trim_rule=trim_rule) # initial survey + self.scale_vocab(keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule) # trim by min_count & precalculate downsampling self.finalize_vocab() # build tables & arrays - def scan_vocab(self, sentences, progress_per=10000): + def scan_vocab(self, sentences, progress_per=10000, trim_rule=None): """Do an initial scan of all words appearing in sentences.""" logger.info("collecting all words and their counts") sentence_no = -1 @@ -500,7 +518,7 @@ def scan_vocab(self, sentences, progress_per=10000): vocab[word] += 1 if self.max_vocab_size and len(vocab) > self.max_vocab_size: - total_words += utils.prune_vocab(vocab, min_reduce) + total_words += utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule) min_reduce += 1 total_words += sum(itervalues(vocab)) @@ -509,7 +527,7 @@ def scan_vocab(self, sentences, progress_per=10000): self.corpus_count = sentence_no + 1 self.raw_vocab = vocab - def scale_vocab(self, min_count=None, sample=None, dry_run=False, keep_raw_vocab=False): + def scale_vocab(self, min_count=None, sample=None, dry_run=False, keep_raw_vocab=False, trim_rule=None): """ Apply vocabulary settings for `min_count` (discarding less-frequent words) and `sample` (controlling the downsampling of more-frequent words). @@ -536,7 +554,7 @@ def scale_vocab(self, min_count=None, sample=None, dry_run=False, keep_raw_vocab drop_unique, drop_total, retain_total, original_total = 0, 0, 0, 0 retain_words = [] for word, v in iteritems(self.raw_vocab): - if v >= min_count: + if keep_vocab_item(word, v, min_count, trim_rule=trim_rule): retain_words.append(word) retain_total += v original_total += v @@ -597,6 +615,8 @@ def finalize_vocab(self): """Build tables and model weights based on final vocabulary settings.""" if not self.index2word: self.scale_vocab() + if self.sorted_vocab: + self.sort_vocab() if self.hs: # add info about each word's Huffman encoding self.create_binary_tree() @@ -613,6 +633,14 @@ def finalize_vocab(self): # set initial input/projection and hidden weights self.reset_weights() + def sort_vocab(self): + """Sort the vocabulary so the most frequent words have the lowest indexes.""" + if hasattr(self, 'syn0'): + raise RuntimeError("must sort before initializing vectors/weights") + self.index2word.sort(key=lambda word: self.vocab[word].count, reverse=True) + for i, word in enumerate(self.index2word): + self.vocab[word].index = i + def reset_from(self, other_model): """ Borrow shareable pre-built structures (like vocab) from the other_model. Useful @@ -624,15 +652,16 @@ def reset_from(self, other_model): self.corpus_count = other_model.corpus_count self.reset_weights() - def _do_train_job(self, job, alpha, inits, sentence_indeces): + def _do_train_job(self, sentences, alpha, inits): work, neu1 = inits - tally = 0 - raw_tally = 0 - if not FAST_VERSION == -1 and self.sg and self.batch: # TODO: do for cbow also. - tally += train_batch_sg(self, job, alpha, sentence_indeces, work) - raw_tally += len(job) + tally, raw_tally = 0, 0 + if self.batch: + assert FAST_VERSION > -1, "FIXME: python-only code path" + assert self.sg, "FIXME: cbow also" + tally += train_batch_sg(self, sentences, alpha, work) + raw_tally += len(sentences) else: - for sentence in job: + for sentence in sentences: if self.sg: tally += train_sentence_sg(self, sentence, alpha, work) else: @@ -643,7 +672,8 @@ def _do_train_job(self, job, alpha, inits, sentence_indeces): def _raw_word_count(self, items): return sum(len(item) for item in items) - def train(self, sentences, total_words=None, word_count=0, chunksize=100, total_examples=None, queue_factor=2, report_delay=1): + def train(self, sentences, total_words=None, word_count=0, chunksize=100, + total_examples=None, queue_factor=2, report_delay=1.0): """ Update the model's neural weights from a sequence of sentences (can be a once-only generator stream). For Word2Vec, each sentence must be a list of unicode strings. (Subclasses may accept other examples.) @@ -677,7 +707,7 @@ def train(self, sentences, total_words=None, word_count=0, chunksize=100, total_ if total_words is None and total_examples is None: if self.corpus_count: total_examples = self.corpus_count - logger.info("expecting %i examples, matching count from corpus used for vocabulary survey", total_examples) + logger.info("expecting %i sentences, matching count from corpus used for vocabulary survey", total_examples) else: raise ValueError("you must provide either total_words or total_examples, to enable alpha and progress calculations") @@ -692,154 +722,124 @@ def worker_init(): return (work, neu1) def worker_one_job(job, inits): - items, alpha, sentence_indeces = job - if items is None: # signal to finish - return False - # train & return tally - tally, raw_tally = self._do_train_job(items, alpha, inits, sentence_indeces) - progress_queue.put((len(sentence_indeces) - 1, tally, raw_tally)) # report progress - return True + sentences, alpha = job + tally, raw_tally = self._do_train_job(sentences, alpha, inits) + progress_queue.put((len(sentences), tally, raw_tally)) # report back progress def worker_loop(): """Train the model, lifting lists of sentences from the jobs queue.""" init = worker_init() while True: job = job_queue.get() - if not worker_one_job(job, init): + if job is None: break - - start, next_report = default_timer(), 1.0 - - # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :( - if self.workers > 0: - job_queue = Queue(maxsize=queue_factor * self.workers) - else: - job_queue = FakeJobQueue(worker_init, worker_one_job) - progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers) - - workers = [threading.Thread(target=worker_loop) for _ in xrange(self.workers)] - for thread in workers: - thread.daemon = True # make interrupting the process with ctrl+c easier - thread.start() - - pushed_words = 0 - pushed_examples = 0 - example_count = 0 - trained_word_count = 0 - raw_word_count = word_count - push_done = False - done_jobs = 0 - next_alpha = self.alpha - - job_batch = [] - num_sentences = 0 - batch_size = 0 - sentence_indeces = [0] - job_no = 0 - MAX_WORDS_IN_BATCH = 10000 # NOTE: should be same as MAX_SENTENCE_LEN in word2vec_inner.pyx. - MAX_NUM_SENTENCES = 1000 # TODO: should be in word2vec_inner.pyx as well. TODO: consider proper value. - # fill jobs queue with (sentence, alpha) job tuples - job_source = enumerate(sentences) - while True: # TODO: use for instead. - try: - sent_idx, sent = job_source.next() - if batch_size + len(sent) < MAX_WORDS_IN_BATCH and num_sentences < MAX_NUM_SENTENCES: - # Append sentence to job batch and proceed. - job_batch.extend(sent) - batch_size += len(sent) - sentence_indeces.append(sentence_indeces[-1] + batch_size) - num_sentences += 1 + worker_one_job(job, init) + + def job_producer(): + """Fill jobs queue using the input sentences iterator.""" + job_batch, batch_size = [], 0 + job_no, pushed_words, pushed_examples = 0, 0, 0 + next_alpha = self.alpha + self.jobs_finished = False + + for sent_idx, sentence in enumerate(sentences): + # clip sentences that are too large for the C structures + sentence = sentence[: MAX_WORDS_IN_BATCH] + + # can we fit this sentence into the existing job batch? + if batch_size + len(sentence) <= MAX_WORDS_IN_BATCH and len(job_batch) + 1 <= MAX_BATCH_SENTENCES: + # yes => add it to the current job + job_batch.append(sentence) + batch_size += len(sentence) else: - # Submit job to queue. + # no => submit the existing job logger.debug("putting job #%i in the queue at alpha %.05f", job_no, next_alpha) - job_queue.put((job_batch, next_alpha, sentence_indeces)) - # update the learning rate before every next job - if self.min_alpha < next_alpha: - if total_examples: - # examples-based decay - pushed_examples += len(job_batch) - next_alpha = self.alpha - (self.alpha - self.min_alpha) * (pushed_examples / total_examples) - else: - # words-based decay - pushed_words += self._raw_word_count(job_batch) - next_alpha = self.alpha - (self.alpha - self.min_alpha) * (pushed_words / total_words) - next_alpha = max(next_alpha, self.min_alpha) + job_queue.put((job_batch, next_alpha)) job_no += 1 - num_sentences = 0 - if len(sent) < MAX_WORDS_IN_BATCH: - job_batch = sent - batch_size = len(sent) - else: - sent_slice = sent[:MAX_WORDS_IN_BATCH] - job_batch = sent_slice - batch_size = len(sent_slice) - sentence_indeces = [0, batch_size] - num_sentences += 1 - except StopIteration: - # No more sentences left. - if job_batch: # If there are still items left in job, submit them. - ## Submit job to queue. - logger.debug("putting job #%i in the queue at alpha %.05f", job_no, next_alpha) - job_queue.put((job_batch, next_alpha, sentence_indeces)) - # update the learning rate before every next job + + # update the learning rate for the next job if self.min_alpha < next_alpha: if total_examples: # examples-based decay pushed_examples += len(job_batch) - next_alpha = self.alpha - (self.alpha - self.min_alpha) * (pushed_examples / total_examples) + progress = 1.0 * pushed_examples / total_examples else: # words-based decay pushed_words += self._raw_word_count(job_batch) - next_alpha = self.alpha - (self.alpha - self.min_alpha) * (pushed_words / total_words) - next_alpha = max(next_alpha, self.min_alpha) - job_batch = [] - batch_size = 0 - job_no += 1 + progress = 1.0 * pushed_words / total_words + next_alpha = self.alpha - (self.alpha - self.min_alpha) * progress + next_alpha = max(self.min_alpha, next_alpha) - logger.info( - "reached end of input; waiting to finish %i outstanding jobs", - job_no - done_jobs) - for _ in xrange(self.workers): - job_queue.put((None, 0, [0])) # give the workers heads up that they can finish -- no more work! - push_done = True + # add the sentence that didn't fit as the first item of a new job + job_batch, batch_size = [sentence], len(sentence) + # add the last job too (may be significantly smaller than MAX_WORDS_IN_BATCH / MAX_BATCH_SENTENCES) + if job_batch: + logger.debug("putting job #%i in the queue at alpha %.05f", job_no, next_alpha) + job_queue.put((job_batch, next_alpha)) + job_no += 1 - if job_no == -1 and self.train_count == 0: + logger.info("reached end of input; waiting to finish %i outstanding jobs", utils.qsize(job_queue)) + + if job_no == 0 and self.train_count == 0: logger.warning( - "train() called with empty iterator (if not intended, " + "train() called with an empty iterator (if not intended, " "be sure to provide a corpus that offers restartable " - "iteration)." + "iteration = an iterable)." ) - try: - while done_jobs < job_no or not push_done: - examples, trained_words, raw_words = progress_queue.get(push_done) # only block after all jobs pushed - example_count += examples - trained_word_count += trained_words # only words in vocab & sampled - raw_word_count += raw_words - done_jobs += 1 - elapsed = default_timer() - start - if elapsed >= next_report: - if total_examples: - # examples-based progress % - logger.info( - "PROGRESS: at %.2f%% examples, %.0f words/s", - 100.0 * example_count / total_examples, trained_word_count / elapsed) - else: - # words-based progress % - logger.info( - "PROGRESS: at %.2f%% words, %.0f words/s", - 100.0 * raw_word_count / total_words, trained_word_count / elapsed) - next_report = elapsed + report_delay # don't flood log, wait report_delay seconds - else: - # loop ended by job count; really done - break - except Empty: - pass # already out of loop; continue to next push + # give the workers heads up that they can finish -- no more work! + for _ in xrange(self.workers): + job_queue.put(None) + self.jobs_finished = True + + # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :( + if self.workers > 0: + job_queue = Queue(maxsize=queue_factor * self.workers) + else: + job_queue = FakeJobQueue(worker_init, worker_one_job) + progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers) + + workers = [threading.Thread(target=worker_loop) for _ in xrange(self.workers)] + workers.append(threading.Thread(target=job_producer)) + for thread in workers: + thread.daemon = True # make interrupting the process with ctrl+c easier + thread.start() + + example_count, trained_word_count, raw_word_count = 0, 0, word_count + start, next_report = default_timer(), 1.0 + + while not (self.jobs_finished and job_queue.empty()): + examples, trained_words, raw_words = progress_queue.get() # blocks if workers too slow + + # update progress stats + example_count += examples + trained_word_count += trained_words # only words in vocab & sampled + raw_word_count += raw_words + + # log progress once every report_delay seconds + elapsed = default_timer() - start + if elapsed >= next_report: + if total_examples: + # examples-based progress % + logger.info( + "PROGRESS: at %.2f%% examples, %.0f words/s, in_qsize %i, out_qsize %i", + 100.0 * example_count / total_examples, trained_word_count / elapsed, + utils.qsize(job_queue), utils.qsize(progress_queue)) + else: + # words-based progress % + logger.info( + "PROGRESS: at %.2f%% words, %.0f words/s, in_qsize %i, out_qsize %i", + 100.0 * raw_word_count / total_words, trained_word_count / elapsed, + utils.qsize(job_queue), utils.qsize(progress_queue)) + next_report = elapsed + report_delay + + # all done; report the final stats elapsed = default_timer() - start logger.info( "training on %i raw words took %.1fs, %.0f trained words/s", raw_word_count, elapsed, trained_word_count / elapsed if elapsed else 0.0) + # check that the input corpus hasn't changed during iteration if total_examples and total_examples != example_count: logger.warn("supplied example count (%i) did not equal expected count (%i)", example_count, total_examples) if total_words and total_words != raw_word_count: @@ -998,7 +998,7 @@ def save_word2vec_format(self, fname, fvocab=None, binary=False): """ if fvocab is not None: - logger.info("Storing vocabulary in %s" % (fvocab)) + logger.info("storing vocabulary in %s" % (fvocab)) with utils.smart_open(fvocab, 'wb') as vout: for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count): vout.write(utils.to_utf8("%s %s\n" % (word, vocab.count))) @@ -1015,7 +1015,7 @@ def save_word2vec_format(self, fname, fvocab=None, binary=False): fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row)))) @classmethod - def load_word2vec_format(cls, fname, fvocab=None, binary=False, norm_only=True, encoding='utf8'): + def load_word2vec_format(cls, fname, fvocab=None, binary=False, norm_only=True, encoding='utf8', unicode_errors='strict'): """ Load the input-hidden weight matrix from the original C word2vec-tool format. @@ -1034,19 +1034,38 @@ def load_word2vec_format(cls, fname, fvocab=None, binary=False, norm_only=True, """ counts = None if fvocab is not None: - logger.info("loading word counts from %s" % (fvocab)) + logger.info("loading word counts from %s", fvocab) counts = {} with utils.smart_open(fvocab) as fin: for line in fin: word, count = utils.to_unicode(line).strip().split() counts[word] = int(count) - logger.info("loading projection weights from %s" % (fname)) + logger.info("loading projection weights from %s", fname) with utils.smart_open(fname) as fin: header = utils.to_unicode(fin.readline(), encoding=encoding) vocab_size, vector_size = map(int, header.split()) # throws for invalid file format - result = Word2Vec(size=vector_size) + result = cls(size=vector_size) result.syn0 = zeros((vocab_size, vector_size), dtype=REAL) + + def add_word(word, weights): + word_id = len(result.vocab) + if word in result.vocab: + logger.warning("duplicate word '%s' in %s, ignoring all but first", word, fname) + return + if counts is None: + # most common scenario: no vocab file given. just make up some bogus counts, in descending order + result.vocab[word] = Vocab(index=word_id, count=vocab_size - word_id) + elif word in counts: + # use count from the vocab file + result.vocab[word] = Vocab(index=word_id, count=counts[word]) + else: + # vocab file given, but word is missing -- set count to None (TODO: or raise?) + logger.warning("vocabulary file is incomplete: '%s' is missing", word) + result.vocab[word] = Vocab(index=word_id, count=None) + result.syn0[word_id] = weights + result.index2word.append(word) + if binary: binary_len = dtype(REAL).itemsize * vector_size for line_no in xrange(vocab_size): @@ -1057,38 +1076,30 @@ def load_word2vec_format(cls, fname, fvocab=None, binary=False, norm_only=True, if ch == b' ': break if ch != b'\n': # ignore newlines in front of words (some binary files have) - word.append(ch) - word = utils.to_unicode(b''.join(word), encoding=encoding) - if counts is None: - result.vocab[word] = Vocab(index=line_no, count=vocab_size - line_no) - elif word in counts: - result.vocab[word] = Vocab(index=line_no, count=counts[word]) - else: - logger.warning("vocabulary file is incomplete") - result.vocab[word] = Vocab(index=line_no, count=None) - result.index2word.append(word) - result.syn0[line_no] = fromstring(fin.read(binary_len), dtype=REAL) + word = utils.to_unicode(b''.join(word), encoding=encoding, errors=unicode_errors) + weights = fromstring(fin.read(binary_len), dtype=REAL) + add_word(word, weights) else: for line_no, line in enumerate(fin): - parts = utils.to_unicode(line.rstrip(), encoding=encoding).split(" ") + parts = utils.to_unicode(line.rstrip(), encoding=encoding, errors=unicode_errors).split(" ") if len(parts) != vector_size + 1: raise ValueError("invalid vector on line %s (is this really the text format?)" % (line_no)) word, weights = parts[0], list(map(REAL, parts[1:])) - if counts is None: - result.vocab[word] = Vocab(index=line_no, count=vocab_size - line_no) - elif word in counts: - result.vocab[word] = Vocab(index=line_no, count=counts[word]) - else: - logger.warning("vocabulary file is incomplete") - result.vocab[word] = Vocab(index=line_no, count=None) - result.index2word.append(word) - result.syn0[line_no] = weights + add_word(word, weights) + if result.syn0.shape[0] != len(result.vocab): + logger.info( + "duplicate words detected, shrinking matrix size from %i to %i", + result.syn0.shape[0], len(result.vocab) + ) + result.syn0 = ascontiguousarray(result.syn0[: len(result.vocab)]) + assert (len(result.vocab), result.vector_size) == result.syn0.shape + logger.info("loaded %s matrix from %s" % (result.syn0.shape, fname)) result.init_sims(norm_only) return result - def intersect_word2vec_format(self, fname, binary=False, encoding='utf8'): + def intersect_word2vec_format(self, fname, binary=False, encoding='utf8', unicode_errors='strict'): """ Merge the input-hidden weight matrix from the original C word2vec-tool format given, where it intersects with the current vocabulary. (No words are added to the @@ -1116,7 +1127,7 @@ def intersect_word2vec_format(self, fname, binary=False, encoding='utf8'): break if ch != b'\n': # ignore newlines in front of words (some binary files have) word.append(ch) - word = utils.to_unicode(b''.join(word), encoding=encoding) + word = utils.to_unicode(b''.join(word), encoding=encoding, errors=unicode_errors) weights = fromstring(fin.read(binary_len), dtype=REAL) if word in self.vocab: overlap_count += 1 @@ -1124,7 +1135,7 @@ def intersect_word2vec_format(self, fname, binary=False, encoding='utf8'): self.syn0_lockf[self.vocab[word].index] = 0.0 # lock it else: for line_no, line in enumerate(fin): - parts = utils.to_unicode(line.rstrip(), encoding=encoding).split(" ") + parts = utils.to_unicode(line.rstrip(), encoding=encoding, errors=unicode_errors).split(" ") if len(parts) != vector_size + 1: raise ValueError("invalid vector on line %s (is this really the text format?)" % (line_no)) word, weights = parts[0], list(map(REAL, parts[1:])) @@ -1133,7 +1144,7 @@ def intersect_word2vec_format(self, fname, binary=False, encoding='utf8'): self.syn0[self.vocab[word].index] = weights logger.info("merged %d vectors into %s matrix from %s" % (overlap_count, self.syn0.shape, fname)) - def most_similar(self, positive=[], negative=[], topn=10): + def most_similar(self, positive=[], negative=[], topn=10, restrict_vocab=None): """ Find the top-N most similar words. Positive words contribute positively towards the similarity, negative words negatively. @@ -1145,6 +1156,11 @@ def most_similar(self, positive=[], negative=[], topn=10): If topn is False, most_similar returns the vector of similarity scores. + `restrict_vocab` is an optional integer which limits the range of vectors which + are searched for most-similar values. For example, restrict_vocab=10000 would + only check the first 10000 word vectors in the vocabulary order. (This may be + meaningful if you've sorted the vocabulary by descending frequency.) + Example:: >>> trained_model.most_similar(positive=['woman', 'king'], negative=['man']) @@ -1181,7 +1197,8 @@ def most_similar(self, positive=[], negative=[], topn=10): raise ValueError("cannot compute similarity with no input") mean = matutils.unitvec(array(mean).mean(axis=0)).astype(REAL) - dists = dot(self.syn0norm, mean) + limited = self.syn0norm if restrict_vocab is None else self.syn0norm[:restrict_vocab] + dists = dot(limited, mean) if not topn: return dists best = matutils.argsort(dists, topn=topn + len(all_words), reverse=True) @@ -1422,7 +1439,7 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar): ignore = set(self.vocab[v].index for v in [a, b, c]) # indexes of words to ignore predicted = None # find the most likely prediction, ignoring OOV words and input words - sims = most_similar(self, positive=[b, c], negative=[a], topn=False) + sims = most_similar(self, positive=[b, c], negative=[a], topn=False, restrict_vocab=restrict_vocab) for index in matutils.argsort(sims, reverse=True): if index in ok_index and index not in ignore: predicted = self.index2word[index] @@ -1463,7 +1480,7 @@ def load(cls, *args, **kwargs): # update older models if hasattr(model, 'table'): delattr(model, 'table') # discard in favor of cum_table - if model.negative: + if model.negative and hasattr(model, 'index2word'): model.make_cum_table() # rebuild cum_table from vocabulary if not hasattr(model, 'corpus_count'): model.corpus_count = None @@ -1473,7 +1490,7 @@ def load(cls, *args, **kwargs): else: v.sample_int = int(round(v.sample_probability * 2**32)) del v.sample_probability - if not hasattr(model, 'syn0_lockf'): + if not hasattr(model, 'syn0_lockf') and hasattr(model, 'syn0'): model.syn0_lockf = ones(len(model.syn0), dtype=REAL) if not hasattr(model, 'random'): model.random = random.RandomState(model.seed) @@ -1543,10 +1560,14 @@ def __iter__(self): class LineSentence(object): - """Simple format: one sentence = one line; words already preprocessed and separated by whitespace.""" - def __init__(self, source, max_sentence_length=10000): + """ + Simple format: one sentence = one line; words already preprocessed and separated by whitespace. + """ + + def __init__(self, source, max_sentence_length=10000, limit=None): """ - `source` can be either a string or a file object. + `source` can be either a string or a file object. Clip the file to the first + `limit` lines (or no clipped if limit is None, the default). Example:: @@ -1560,6 +1581,7 @@ def __init__(self, source, max_sentence_length=10000): """ self.source = source self.max_sentence_length = max_sentence_length + self.limit = limit def __iter__(self): """Iterate through the lines in the source.""" @@ -1567,20 +1589,20 @@ def __iter__(self): # Assume it is a file-like object and try treating it as such # Things that don't have seek will trigger an exception self.source.seek(0) - for line in self.source: + for line in itertools.islice(self.source, self.limit): line = utils.to_unicode(line).split() i = 0 while i < len(line): - yield line[i:(i + self.max_sentence_length)] + yield line[i : i + self.max_sentence_length] i += self.max_sentence_length except AttributeError: # If it didn't work like a file, use it as a string filename with utils.smart_open(self.source) as fin: - for line in fin: + for line in itertools.islice(fin, self.limit): line = utils.to_unicode(line).split() i = 0 while i < len(line): - yield line[i:(i + self.max_sentence_length)] + yield line[i : i + self.max_sentence_length] i += self.max_sentence_length diff --git a/gensim/models/word2vec_inner.c b/gensim/models/word2vec_inner.c index 56e672448b..d46a5be47a 100644 --- a/gensim/models/word2vec_inner.c +++ b/gensim/models/word2vec_inner.c @@ -1,13 +1,25 @@ -/* Generated by Cython 0.23.2 */ +/* Generated by Cython 0.22.1 */ #define PY_SSIZE_T_CLEAN +#ifndef CYTHON_USE_PYLONG_INTERNALS +#ifdef PYLONG_BITS_IN_DIGIT +#define CYTHON_USE_PYLONG_INTERNALS 0 +#else +#include "pyconfig.h" +#ifdef PYLONG_BITS_IN_DIGIT +#define CYTHON_USE_PYLONG_INTERNALS 1 +#else +#define CYTHON_USE_PYLONG_INTERNALS 0 +#endif +#endif +#endif #include "Python.h" #ifndef Py_PYTHON_H #error Python headers needed to compile C extensions, please install development version of Python. #elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03020000) #error Cython requires Python 2.6+ or Python 3.2+. #else -#define CYTHON_ABI "0_23_2" +#define CYTHON_ABI "0_22_1" #include #ifndef offsetof #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) @@ -42,9 +54,6 @@ #define CYTHON_COMPILING_IN_PYPY 0 #define CYTHON_COMPILING_IN_CPYTHON 1 #endif -#if !defined(CYTHON_USE_PYLONG_INTERNALS) && CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x02070000 -#define CYTHON_USE_PYLONG_INTERNALS 1 -#endif #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag) #define Py_OptimizeFlag 0 #endif @@ -52,12 +61,12 @@ #define CYTHON_FORMAT_SSIZE_T "z" #if PY_MAJOR_VERSION < 3 #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" - #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \ PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) #define __Pyx_DefaultClassType PyClass_Type #else #define __Pyx_BUILTIN_MODULE_NAME "builtins" - #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \ PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) #define __Pyx_DefaultClassType PyType_Type #endif @@ -75,7 +84,7 @@ #endif #if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) #define CYTHON_PEP393_ENABLED 1 - #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ + #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ? \ 0 : _PyUnicode_Ready((PyObject *)(op))) #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) @@ -94,10 +103,12 @@ #if CYTHON_COMPILING_IN_PYPY #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) + #define __Pyx_PyFrozenSet_Size(s) PyObject_Size(s) #else #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) - #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ + #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ? \ PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) + #define __Pyx_PyFrozenSet_Size(s) PySet_Size(s) #endif #if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains) #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) @@ -165,32 +176,6 @@ #else #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass) #endif -#if PY_VERSION_HEX >= 0x030500B1 -#define __Pyx_PyAsyncMethodsStruct PyAsyncMethods -#define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) -#elif CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 -typedef struct { - unaryfunc am_await; - unaryfunc am_aiter; - unaryfunc am_anext; -} __Pyx_PyAsyncMethodsStruct; -#define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) -#else -#define __Pyx_PyType_AsAsync(obj) NULL -#endif -#ifndef CYTHON_RESTRICT - #if defined(__GNUC__) - #define CYTHON_RESTRICT __restrict__ - #elif defined(_MSC_VER) && _MSC_VER >= 1400 - #define CYTHON_RESTRICT __restrict - #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define CYTHON_RESTRICT restrict - #else - #define CYTHON_RESTRICT - #endif -#endif -#define __Pyx_void_to_None(void_result) (void_result, Py_INCREF(Py_None), Py_None) - #ifndef CYTHON_INLINE #if defined(__GNUC__) #define CYTHON_INLINE __inline__ @@ -202,20 +187,46 @@ typedef struct { #define CYTHON_INLINE #endif #endif - -#if defined(WIN32) || defined(MS_WINDOWS) - #define _USE_MATH_DEFINES +#ifndef CYTHON_RESTRICT + #if defined(__GNUC__) + #define CYTHON_RESTRICT __restrict__ + #elif defined(_MSC_VER) && _MSC_VER >= 1400 + #define CYTHON_RESTRICT __restrict + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_RESTRICT restrict + #else + #define CYTHON_RESTRICT + #endif #endif -#include #ifdef NAN #define __PYX_NAN() ((float) NAN) #else static CYTHON_INLINE float __PYX_NAN() { + /* Initialize NaN. The sign is irrelevant, an exponent with all bits 1 and + a nonzero mantissa means NaN. If the first bit in the mantissa is 1, it is + a quiet NaN. */ float value; memset(&value, 0xFF, sizeof(value)); return value; } #endif +#define __Pyx_void_to_None(void_result) (void_result, Py_INCREF(Py_None), Py_None) +#ifdef __cplusplus +template +void __Pyx_call_destructor(T* x) { + x->~T(); +} +template +class __Pyx_FakeReference { + public: + __Pyx_FakeReference() : ptr(NULL) { } + __Pyx_FakeReference(T& ref) : ptr(&ref) { } + T *operator->() { return ptr; } + operator T&() { return *ptr; } + private: + T *ptr; +}; +#endif #if PY_MAJOR_VERSION >= 3 @@ -234,6 +245,10 @@ static CYTHON_INLINE float __PYX_NAN() { #endif #endif +#if defined(WIN32) || defined(MS_WINDOWS) +#define _USE_MATH_DEFINES +#endif +#include #define __PYX_HAVE__gensim__models__word2vec_inner #define __PYX_HAVE_API__gensim__models__word2vec_inner #include "voidptr.h" @@ -279,34 +294,16 @@ typedef struct {PyObject **p; char *s; const Py_ssize_t n; const char* encoding; #define __PYX_DEFAULT_STRING_ENCODING "" #define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString #define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize -#define __Pyx_uchar_cast(c) ((unsigned char)c) -#define __Pyx_long_cast(x) ((long)x) -#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ - (sizeof(type) < sizeof(Py_ssize_t)) ||\ - (sizeof(type) > sizeof(Py_ssize_t) &&\ - likely(v < (type)PY_SSIZE_T_MAX ||\ - v == (type)PY_SSIZE_T_MAX) &&\ - (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ - v == (type)PY_SSIZE_T_MIN))) ||\ - (sizeof(type) == sizeof(Py_ssize_t) &&\ - (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ +#define __Pyx_fits_Py_ssize_t(v, type, is_signed) ( \ + (sizeof(type) < sizeof(Py_ssize_t)) || \ + (sizeof(type) > sizeof(Py_ssize_t) && \ + likely(v < (type)PY_SSIZE_T_MAX || \ + v == (type)PY_SSIZE_T_MAX) && \ + (!is_signed || likely(v > (type)PY_SSIZE_T_MIN || \ + v == (type)PY_SSIZE_T_MIN))) || \ + (sizeof(type) == sizeof(Py_ssize_t) && \ + (is_signed || likely(v < (type)PY_SSIZE_T_MAX || \ v == (type)PY_SSIZE_T_MAX))) ) -#if defined (__cplusplus) && __cplusplus >= 201103L - #include - #define __Pyx_sst_abs(value) std::abs(value) -#elif SIZEOF_INT >= SIZEOF_SIZE_T - #define __Pyx_sst_abs(value) abs(value) -#elif SIZEOF_LONG >= SIZEOF_SIZE_T - #define __Pyx_sst_abs(value) labs(value) -#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define __Pyx_sst_abs(value) llabs(value) -#elif defined (_MSC_VER) && defined (_M_X64) - #define __Pyx_sst_abs(value) _abs64(value) -#elif defined (__GNUC__) - #define __Pyx_sst_abs(value) __builtin_llabs(value) -#else - #define __Pyx_sst_abs(value) ((value<0) ? -value : value) -#endif static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*); static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); #define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s)) @@ -341,9 +338,8 @@ static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) #define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u)) #define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode #define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode -#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) -#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) -#define __Pyx_PyBool_FromLong(b) ((b) ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False)) +#define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None) +#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False)) static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x); static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); @@ -477,17 +473,17 @@ static const char *__pyx_f[] = { "type.pxd", }; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":725 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":726 * # in Cython to enable them only on the right systems. - * + * * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<< * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t */ typedef npy_int8 __pyx_t_5numpy_int8_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":726 - * +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":727 + * * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<< * ctypedef npy_int32 int32_t @@ -495,7 +491,7 @@ typedef npy_int8 __pyx_t_5numpy_int8_t; */ typedef npy_int16 __pyx_t_5numpy_int16_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":727 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":728 * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<< @@ -504,7 +500,7 @@ typedef npy_int16 __pyx_t_5numpy_int16_t; */ typedef npy_int32 __pyx_t_5numpy_int32_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":728 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":729 * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<< @@ -513,17 +509,17 @@ typedef npy_int32 __pyx_t_5numpy_int32_t; */ typedef npy_int64 __pyx_t_5numpy_int64_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":732 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":733 * #ctypedef npy_int128 int128_t - * + * * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<< * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t */ typedef npy_uint8 __pyx_t_5numpy_uint8_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":733 - * +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":734 + * * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<< * ctypedef npy_uint32 uint32_t @@ -531,7 +527,7 @@ typedef npy_uint8 __pyx_t_5numpy_uint8_t; */ typedef npy_uint16 __pyx_t_5numpy_uint16_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":734 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":735 * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<< @@ -540,7 +536,7 @@ typedef npy_uint16 __pyx_t_5numpy_uint16_t; */ typedef npy_uint32 __pyx_t_5numpy_uint32_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":735 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":736 * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<< @@ -549,17 +545,17 @@ typedef npy_uint32 __pyx_t_5numpy_uint32_t; */ typedef npy_uint64 __pyx_t_5numpy_uint64_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":739 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":740 * #ctypedef npy_uint128 uint128_t - * + * * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<< * ctypedef npy_float64 float64_t * #ctypedef npy_float80 float80_t */ typedef npy_float32 __pyx_t_5numpy_float32_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":740 - * +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":741 + * * ctypedef npy_float32 float32_t * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<< * #ctypedef npy_float80 float80_t @@ -567,7 +563,7 @@ typedef npy_float32 __pyx_t_5numpy_float32_t; */ typedef npy_float64 __pyx_t_5numpy_float64_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":749 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":750 * # The int types are mapped a bit surprising -- * # numpy.int corresponds to 'l' and numpy.long to 'q' * ctypedef npy_long int_t # <<<<<<<<<<<<<< @@ -576,101 +572,101 @@ typedef npy_float64 __pyx_t_5numpy_float64_t; */ typedef npy_long __pyx_t_5numpy_int_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":750 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":751 * # numpy.int corresponds to 'l' and numpy.long to 'q' * ctypedef npy_long int_t * ctypedef npy_longlong long_t # <<<<<<<<<<<<<< * ctypedef npy_longlong longlong_t - * + * */ typedef npy_longlong __pyx_t_5numpy_long_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":751 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":752 * ctypedef npy_long int_t * ctypedef npy_longlong long_t * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<< - * + * * ctypedef npy_ulong uint_t */ typedef npy_longlong __pyx_t_5numpy_longlong_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":753 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":754 * ctypedef npy_longlong longlong_t - * + * * ctypedef npy_ulong uint_t # <<<<<<<<<<<<<< * ctypedef npy_ulonglong ulong_t * ctypedef npy_ulonglong ulonglong_t */ typedef npy_ulong __pyx_t_5numpy_uint_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":754 - * +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":755 + * * ctypedef npy_ulong uint_t * ctypedef npy_ulonglong ulong_t # <<<<<<<<<<<<<< * ctypedef npy_ulonglong ulonglong_t - * + * */ typedef npy_ulonglong __pyx_t_5numpy_ulong_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":755 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":756 * ctypedef npy_ulong uint_t * ctypedef npy_ulonglong ulong_t * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<< - * + * * ctypedef npy_intp intp_t */ typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":757 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":758 * ctypedef npy_ulonglong ulonglong_t - * + * * ctypedef npy_intp intp_t # <<<<<<<<<<<<<< * ctypedef npy_uintp uintp_t - * + * */ typedef npy_intp __pyx_t_5numpy_intp_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":758 - * +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":759 + * * ctypedef npy_intp intp_t * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<< - * + * * ctypedef npy_double float_t */ typedef npy_uintp __pyx_t_5numpy_uintp_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":760 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":761 * ctypedef npy_uintp uintp_t - * + * * ctypedef npy_double float_t # <<<<<<<<<<<<<< * ctypedef npy_double double_t * ctypedef npy_longdouble longdouble_t */ typedef npy_double __pyx_t_5numpy_float_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":761 - * +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":762 + * * ctypedef npy_double float_t * ctypedef npy_double double_t # <<<<<<<<<<<<<< * ctypedef npy_longdouble longdouble_t - * + * */ typedef npy_double __pyx_t_5numpy_double_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":762 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":763 * ctypedef npy_double float_t * ctypedef npy_double double_t * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<< - * + * * ctypedef npy_cfloat cfloat_t */ typedef npy_longdouble __pyx_t_5numpy_longdouble_t; /* "gensim/models/word2vec_inner.pxd":12 - * + * * cimport numpy as np * ctypedef np.float32_t REAL_t # <<<<<<<<<<<<<< - * + * * # BLAS routine signatures */ typedef __pyx_t_5numpy_float32_t __pyx_t_6gensim_6models_14word2vec_inner_REAL_t; @@ -697,44 +693,44 @@ typedef __pyx_t_5numpy_float32_t __pyx_t_6gensim_6models_14word2vec_inner_REAL_t /*--- Type declarations ---*/ -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":764 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":765 * ctypedef npy_longdouble longdouble_t - * + * * ctypedef npy_cfloat cfloat_t # <<<<<<<<<<<<<< * ctypedef npy_cdouble cdouble_t * ctypedef npy_clongdouble clongdouble_t */ typedef npy_cfloat __pyx_t_5numpy_cfloat_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":765 - * +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":766 + * * ctypedef npy_cfloat cfloat_t * ctypedef npy_cdouble cdouble_t # <<<<<<<<<<<<<< * ctypedef npy_clongdouble clongdouble_t - * + * */ typedef npy_cdouble __pyx_t_5numpy_cdouble_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":766 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":767 * ctypedef npy_cfloat cfloat_t * ctypedef npy_cdouble cdouble_t * ctypedef npy_clongdouble clongdouble_t # <<<<<<<<<<<<<< - * + * * ctypedef npy_cdouble complex_t */ typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t; -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":768 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":769 * ctypedef npy_clongdouble clongdouble_t - * + * * ctypedef npy_cdouble complex_t # <<<<<<<<<<<<<< - * + * * cdef inline object PyArray_MultiIterNew1(a): */ typedef npy_cdouble __pyx_t_5numpy_complex_t; /* "gensim/models/word2vec_inner.pxd":15 - * + * * # BLAS routine signatures * ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) nogil # <<<<<<<<<<<<<< * ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil @@ -774,7 +770,7 @@ typedef double (*__pyx_t_6gensim_6models_14word2vec_inner_dsdot_ptr)(int const * * ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil * ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil # <<<<<<<<<<<<<< * ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil - * + * */ typedef double (*__pyx_t_6gensim_6models_14word2vec_inner_snrm2_ptr)(int const *, float const *, int const *); @@ -782,17 +778,17 @@ typedef double (*__pyx_t_6gensim_6models_14word2vec_inner_snrm2_ptr)(int const * * ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil * ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil * ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil # <<<<<<<<<<<<<< - * + * * cdef scopy_ptr scopy */ typedef void (*__pyx_t_6gensim_6models_14word2vec_inner_sscal_ptr)(int const *, float const *, float const *, int const *); /* "gensim/models/word2vec_inner.pxd":35 - * + * * # function implementations swapped based on BLAS detected in word2vec_inner.pyx init() * ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil # <<<<<<<<<<<<<< * ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil - * + * */ typedef __pyx_t_6gensim_6models_14word2vec_inner_REAL_t (*__pyx_t_6gensim_6models_14word2vec_inner_our_dot_ptr)(int const *, float const *, int const *, float const *, int const *); @@ -800,7 +796,7 @@ typedef __pyx_t_6gensim_6models_14word2vec_inner_REAL_t (*__pyx_t_6gensim_6model * # function implementations swapped based on BLAS detected in word2vec_inner.pyx init() * ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil * ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil # <<<<<<<<<<<<<< - * + * * cdef our_dot_ptr our_dot */ typedef void (*__pyx_t_6gensim_6models_14word2vec_inner_our_saxpy_ptr)(int const *, float const *, float const *, int const *, float *, int const *); @@ -822,19 +818,19 @@ typedef void (*__pyx_t_6gensim_6models_14word2vec_inner_our_saxpy_ptr)(int const static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; #ifdef WITH_THREAD - #define __Pyx_RefNannySetupContext(name, acquire_gil)\ - if (acquire_gil) {\ - PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ - PyGILState_Release(__pyx_gilstate_save);\ - } else {\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ + #define __Pyx_RefNannySetupContext(name, acquire_gil) \ + if (acquire_gil) { \ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure(); \ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \ + PyGILState_Release(__pyx_gilstate_save); \ + } else { \ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \ } #else - #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + #define __Pyx_RefNannySetupContext(name, acquire_gil) \ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__) #endif - #define __Pyx_RefNannyFinishContext()\ + #define __Pyx_RefNannyFinishContext() \ __Pyx_RefNanny->FinishContext(&__pyx_refnanny) #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__) #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__) @@ -857,13 +853,13 @@ typedef void (*__pyx_t_6gensim_6models_14word2vec_inner_our_saxpy_ptr)(int const #define __Pyx_XGOTREF(r) #define __Pyx_XGIVEREF(r) #endif -#define __Pyx_XDECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; __Pyx_XDECREF(tmp);\ +#define __Pyx_XDECREF_SET(r, v) do { \ + PyObject *tmp = (PyObject *) r; \ + r = v; __Pyx_XDECREF(tmp); \ } while (0) -#define __Pyx_DECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; __Pyx_DECREF(tmp);\ +#define __Pyx_DECREF_SET(r, v) do { \ + PyObject *tmp = (PyObject *) r; \ + r = v; __Pyx_DECREF(tmp); \ } while (0) #define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) #define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) @@ -890,8 +886,8 @@ static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); -static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\ - PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\ +static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[], \ + PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, \ const char* function_name); static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); @@ -902,18 +898,11 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg #define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) #endif -static CYTHON_INLINE int __Pyx_PySequence_ContainsTF(PyObject* item, PyObject* seq, int eq) { +static CYTHON_INLINE int __Pyx_PySequence_Contains(PyObject* item, PyObject* seq, int eq) { int result = PySequence_Contains(seq, item); return unlikely(result < 0) ? result : (result == (eq == Py_EQ)); } -#if CYTHON_COMPILING_IN_CPYTHON -static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, long intval, int inplace); -#else -#define __Pyx_PyInt_AddObjC(op1, op2, intval, inplace)\ - (inplace ? PyNumber_InPlaceAdd(op1, op2) : PyNumber_Add(op1, op2)) -#endif - static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice( PyObject* obj, Py_ssize_t cstart, Py_ssize_t cstop, PyObject** py_start, PyObject** py_stop, PyObject** py_slice, @@ -950,8 +939,6 @@ static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index); static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void); -static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); - static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); static CYTHON_INLINE void __Pyx_ExceptionSave(PyObject **type, PyObject **value, PyObject **tb); @@ -990,6 +977,8 @@ static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); static void __Pyx_AddTraceback(const char *funcname, int c_line, int py_line, const char *filename); +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); static CYTHON_INLINE PY_LONG_LONG __Pyx_PyInt_As_PY_LONG_LONG(PyObject *); @@ -1008,7 +997,7 @@ static CYTHON_INLINE npy_uint32 __Pyx_PyInt_As_npy_uint32(PyObject *); #define __PYX_FORCE_INIT_THREADS 0 #endif -static CYTHON_INLINE long __Pyx_pow_long(long, long); +static CYTHON_INLINE long __Pyx_pow_long(long, long); /* proto */ #if CYTHON_CCOMPLEX #ifdef __cplusplus @@ -1108,8 +1097,6 @@ static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(do #endif #endif -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_enum__NPY_TYPES(enum NPY_TYPES value); - static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); static int __Pyx_check_binary_version(void); @@ -1152,21 +1139,19 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /* Module declarations from 'cpython.buffer' */ +/* Module declarations from 'cpython.ref' */ + /* Module declarations from 'libc.string' */ /* Module declarations from 'libc.stdio' */ +/* Module declarations from 'cpython.object' */ + /* Module declarations from '__builtin__' */ /* Module declarations from 'cpython.type' */ static PyTypeObject *__pyx_ptype_7cpython_4type_type = 0; -/* Module declarations from 'cpython' */ - -/* Module declarations from 'cpython.object' */ - -/* Module declarations from 'cpython.ref' */ - /* Module declarations from 'libc.stdlib' */ /* Module declarations from 'numpy' */ @@ -1190,10 +1175,10 @@ static __pyx_t_6gensim_6models_14word2vec_inner_sdot_ptr __pyx_v_6gensim_6models static __pyx_t_6gensim_6models_14word2vec_inner_dsdot_ptr __pyx_v_6gensim_6models_14word2vec_inner_dsdot; static __pyx_t_6gensim_6models_14word2vec_inner_snrm2_ptr __pyx_v_6gensim_6models_14word2vec_inner_snrm2; static __pyx_t_6gensim_6models_14word2vec_inner_sscal_ptr __pyx_v_6gensim_6models_14word2vec_inner_sscal; -static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[0x3E8]; +static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[1000]; static __pyx_t_6gensim_6models_14word2vec_inner_our_dot_ptr __pyx_v_6gensim_6models_14word2vec_inner_our_dot; static __pyx_t_6gensim_6models_14word2vec_inner_our_saxpy_ptr __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy; -static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_6gensim_6models_14word2vec_inner_LOG_TABLE[0x3E8]; +static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_6gensim_6models_14word2vec_inner_LOG_TABLE[1000]; static int __pyx_v_6gensim_6models_14word2vec_inner_ONE; static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_6gensim_6models_14word2vec_inner_ONEF; static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_6gensim_6models_14word2vec_inner_our_dot_double(int const *, float const *, int const *, float const *, int const *); /*proto*/ @@ -1217,6 +1202,14 @@ static PyObject *__pyx_builtin_range; static PyObject *__pyx_builtin_enumerate; static PyObject *__pyx_builtin_ValueError; static PyObject *__pyx_builtin_RuntimeError; +static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentences, PyObject *__pyx_v_alpha, PyObject *__pyx_v_sentence_indeces, PyObject *__pyx_v__work); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v__work); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_10init(CYTHON_UNUSED PyObject *__pyx_self); /* proto */ +static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ +static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */ static char __pyx_k_B[] = "B"; static char __pyx_k_H[] = "H"; static char __pyx_k_I[] = "I"; @@ -1321,12 +1314,14 @@ static char __pyx_k_sentence_indeces[] = "sentence_indeces"; static char __pyx_k_scipy_linalg_blas[] = "scipy.linalg.blas"; static char __pyx_k_score_sentence_sg[] = "score_sentence_sg"; static char __pyx_k_train_sentence_sg[] = "train_sentence_sg"; +static char __pyx_k_MAX_WORDS_IN_BATCH[] = "MAX_WORDS_IN_BATCH"; static char __pyx_k_sentence_indeces_c[] = "sentence_indeces_c"; +static char __pyx_k_MAX_BATCH_SENTENCES[] = "MAX_BATCH_SENTENCES"; static char __pyx_k_score_sentence_cbow[] = "score_sentence_cbow"; static char __pyx_k_train_sentence_cbow[] = "train_sentence_cbow"; static char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous"; static char __pyx_k_gensim_models_word2vec_inner[] = "gensim.models.word2vec_inner"; -static char __pyx_k_home_olavur_RaRe_w2v_batch_sent[] = "/home/olavur/RaRe/w2v_batch_sentences/gensim/gensim/models/word2vec_inner.pyx"; +static char __pyx_k_Volumes_work_workspace_gensim_t[] = "/Volumes/work/workspace/gensim/trunk/gensim/models/word2vec_inner.pyx"; static char __pyx_k_unknown_dtype_code_in_numpy_pxd[] = "unknown dtype code in numpy.pxd (%d)"; static char __pyx_k_Format_string_allocated_too_shor[] = "Format string allocated too short, see comment in numpy.pxd"; static char __pyx_k_Non_native_byte_order_not_suppor[] = "Non-native byte order not supported"; @@ -1337,10 +1332,13 @@ static PyObject *__pyx_n_s_FAST_VERSION; static PyObject *__pyx_kp_u_Format_string_allocated_too_shor; static PyObject *__pyx_kp_u_Format_string_allocated_too_shor_2; static PyObject *__pyx_n_s_ImportError; +static PyObject *__pyx_n_s_MAX_BATCH_SENTENCES; +static PyObject *__pyx_n_s_MAX_WORDS_IN_BATCH; static PyObject *__pyx_kp_u_Non_native_byte_order_not_suppor; static PyObject *__pyx_n_s_REAL; static PyObject *__pyx_n_s_RuntimeError; static PyObject *__pyx_n_s_ValueError; +static PyObject *__pyx_kp_s_Volumes_work_workspace_gensim_t; static PyObject *__pyx_n_s__13; static PyObject *__pyx_n_s_alpha; static PyObject *__pyx_n_s_alpha_2; @@ -1358,7 +1356,6 @@ static PyObject *__pyx_n_s_expected; static PyObject *__pyx_n_s_fblas; static PyObject *__pyx_n_s_float32; static PyObject *__pyx_n_s_gensim_models_word2vec_inner; -static PyObject *__pyx_kp_s_home_olavur_RaRe_w2v_batch_sent; static PyObject *__pyx_n_s_hs; static PyObject *__pyx_n_s_i; static PyObject *__pyx_n_s_idx; @@ -1431,17 +1428,11 @@ static PyObject *__pyx_n_s_work; static PyObject *__pyx_n_s_work_2; static PyObject *__pyx_n_s_x; static PyObject *__pyx_n_s_y; -static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work); /* proto */ -static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentences, PyObject *__pyx_v_alpha, PyObject *__pyx_v_sentence_indeces, PyObject *__pyx_v__work); /* proto */ -static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1); /* proto */ -static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v__work); /* proto */ -static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1); /* proto */ -static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_10init(CYTHON_UNUSED PyObject *__pyx_self); /* proto */ -static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ -static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */ static PyObject *__pyx_int_0; static PyObject *__pyx_int_1; static PyObject *__pyx_int_2; +static PyObject *__pyx_int_1000; +static PyObject *__pyx_int_100000; static PyObject *__pyx_int_16777216; static PyObject *__pyx_tuple_; static PyObject *__pyx_tuple__2; @@ -1469,11 +1460,11 @@ static PyObject *__pyx_codeobj__23; static PyObject *__pyx_codeobj__25; /* "gensim/models/word2vec_inner.pyx":47 - * + * * # for when fblas.sdot returns a double * cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< * return dsdot(N, X, incX, Y, incY) - * + * */ static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_6gensim_6models_14word2vec_inner_our_dot_double(int const *__pyx_v_N, float const *__pyx_v_X, int const *__pyx_v_incX, float const *__pyx_v_Y, int const *__pyx_v_incY) { @@ -1483,18 +1474,18 @@ static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_6gensim_6models_1 * # for when fblas.sdot returns a double * cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: * return dsdot(N, X, incX, Y, incY) # <<<<<<<<<<<<<< - * + * * # for when fblas.sdot returns a float */ __pyx_r = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)__pyx_v_6gensim_6models_14word2vec_inner_dsdot(__pyx_v_N, __pyx_v_X, __pyx_v_incX, __pyx_v_Y, __pyx_v_incY)); goto __pyx_L0; /* "gensim/models/word2vec_inner.pyx":47 - * + * * # for when fblas.sdot returns a double * cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< * return dsdot(N, X, incX, Y, incY) - * + * */ /* function exit code */ @@ -1503,11 +1494,11 @@ static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_6gensim_6models_1 } /* "gensim/models/word2vec_inner.pyx":51 - * + * * # for when fblas.sdot returns a float * cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< * return sdot(N, X, incX, Y, incY) - * + * */ static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_6gensim_6models_14word2vec_inner_our_dot_float(int const *__pyx_v_N, float const *__pyx_v_X, int const *__pyx_v_incX, float const *__pyx_v_Y, int const *__pyx_v_incY) { @@ -1517,18 +1508,18 @@ static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_6gensim_6models_1 * # for when fblas.sdot returns a float * cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: * return sdot(N, X, incX, Y, incY) # <<<<<<<<<<<<<< - * + * * # for when no blas available */ __pyx_r = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)__pyx_v_6gensim_6models_14word2vec_inner_sdot(__pyx_v_N, __pyx_v_X, __pyx_v_incX, __pyx_v_Y, __pyx_v_incY)); goto __pyx_L0; /* "gensim/models/word2vec_inner.pyx":51 - * + * * # for when fblas.sdot returns a float * cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< * return sdot(N, X, incX, Y, incY) - * + * */ /* function exit code */ @@ -1537,7 +1528,7 @@ static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_6gensim_6models_1 } /* "gensim/models/word2vec_inner.pyx":55 - * + * * # for when no blas available * cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< * # not a true full dot()-implementation: just enough for our cases @@ -1574,7 +1565,7 @@ static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_6gensim_6models_1 * for i from 0 <= i < N[0] by 1: * a += X[i] * Y[i] # <<<<<<<<<<<<<< * return a - * + * */ __pyx_v_a = (__pyx_v_a + ((__pyx_v_X[__pyx_v_i]) * (__pyx_v_Y[__pyx_v_i]))); } @@ -1583,14 +1574,14 @@ static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_6gensim_6models_1 * for i from 0 <= i < N[0] by 1: * a += X[i] * Y[i] * return a # <<<<<<<<<<<<<< - * + * * # for when no blas available */ __pyx_r = __pyx_v_a; goto __pyx_L0; /* "gensim/models/word2vec_inner.pyx":55 - * + * * # for when no blas available * cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< * # not a true full dot()-implementation: just enough for our cases @@ -1603,7 +1594,7 @@ static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_6gensim_6models_1 } /* "gensim/models/word2vec_inner.pyx":65 - * + * * # for when no blas available * cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< * cdef int i @@ -1619,7 +1610,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas(int const * cdef int i * for i from 0 <= i < N[0] by 1: # <<<<<<<<<<<<<< * Y[i * (incY[0])] = (alpha[0]) * X[i * (incX[0])] + Y[i * (incY[0])] - * + * */ __pyx_t_1 = (__pyx_v_N[0]); for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i+=1) { @@ -1628,14 +1619,14 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas(int const * cdef int i * for i from 0 <= i < N[0] by 1: * Y[i * (incY[0])] = (alpha[0]) * X[i * (incX[0])] + Y[i * (incY[0])] # <<<<<<<<<<<<<< - * - * + * + * */ (__pyx_v_Y[(__pyx_v_i * (__pyx_v_incY[0]))]) = (((__pyx_v_alpha[0]) * (__pyx_v_X[(__pyx_v_i * (__pyx_v_incX[0]))])) + (__pyx_v_Y[(__pyx_v_i * (__pyx_v_incY[0]))])); } /* "gensim/models/word2vec_inner.pyx":65 - * + * * # for when no blas available * cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< * cdef int i @@ -1646,8 +1637,8 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas(int const } /* "gensim/models/word2vec_inner.pyx":71 - * - * + * + * * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, * REAL_t *syn0, REAL_t *syn1, const int size, @@ -1665,17 +1656,17 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs(__pyx_t int __pyx_t_4; /* "gensim/models/word2vec_inner.pyx":77 - * + * * cdef long long a, b * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< * cdef REAL_t f, g - * + * */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); /* "gensim/models/word2vec_inner.pyx":80 * cdef REAL_t f, g - * + * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * for b in range(codelen): * row2 = word_point[b] * size @@ -1683,7 +1674,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs(__pyx_t memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); /* "gensim/models/word2vec_inner.pyx":81 - * + * * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelen): # <<<<<<<<<<<<<< * row2 = word_point[b] * size @@ -1737,14 +1728,6 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs(__pyx_t * g = (1 - word_code[b] - f) * alpha */ goto __pyx_L3_continue; - - /* "gensim/models/word2vec_inner.pyx":84 - * row2 = word_point[b] * size - * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ } /* "gensim/models/word2vec_inner.pyx":86 @@ -1779,7 +1762,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs(__pyx_t * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< * our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) - * + * */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); __pyx_L3_continue:; @@ -1789,14 +1772,14 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs(__pyx_t * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) * our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< - * - * + * + * */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); /* "gensim/models/word2vec_inner.pyx":71 - * - * + * + * * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, * REAL_t *syn0, REAL_t *syn1, const int size, @@ -1806,7 +1789,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs(__pyx_t } /* "gensim/models/word2vec_inner.pyx":94 - * + * * # to support random draws from negative-sampling cum_table * cdef inline unsigned long long bisect_left(np.uint32_t *a, unsigned long long x, unsigned long long lo, unsigned long long hi) nogil: # <<<<<<<<<<<<<< * cdef unsigned long long mid @@ -1856,25 +1839,17 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_in * lo = mid + 1 */ __pyx_v_hi = __pyx_v_mid; - - /* "gensim/models/word2vec_inner.pyx":98 - * while hi > lo: - * mid = (lo + hi) >> 1 - * if a[mid] >= x: # <<<<<<<<<<<<<< - * hi = mid - * else: - */ goto __pyx_L5; } + /*else*/ { - /* "gensim/models/word2vec_inner.pyx":101 + /* "gensim/models/word2vec_inner.pyx":101 * hi = mid * else: * lo = mid + 1 # <<<<<<<<<<<<<< * return lo - * + * */ - /*else*/ { __pyx_v_lo = (__pyx_v_mid + 1); } __pyx_L5:; @@ -1884,14 +1859,14 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_in * else: * lo = mid + 1 * return lo # <<<<<<<<<<<<<< - * + * * # this quick & dirty RNG apparently matches Java's (non-Secure)Random */ __pyx_r = __pyx_v_lo; goto __pyx_L0; /* "gensim/models/word2vec_inner.pyx":94 - * + * * # to support random draws from negative-sampling cum_table * cdef inline unsigned long long bisect_left(np.uint32_t *a, unsigned long long x, unsigned long long lo, unsigned long long hi) nogil: # <<<<<<<<<<<<<< * cdef unsigned long long mid @@ -1929,7 +1904,7 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_in * cdef unsigned long long this_random = next_random[0] >> 16 * next_random[0] = (next_random[0] * 25214903917ULL + 11) & 281474976710655ULL # <<<<<<<<<<<<<< * return this_random - * + * */ (__pyx_v_next_random[0]) = ((((__pyx_v_next_random[0]) * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & 281474976710655ULL); @@ -1937,7 +1912,7 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_in * cdef unsigned long long this_random = next_random[0] >> 16 * next_random[0] = (next_random[0] * 25214903917ULL + 11) & 281474976710655ULL * return this_random # <<<<<<<<<<<<<< - * + * * cdef unsigned long long fast_sentence_sg_neg( */ __pyx_r = __pyx_v_this_random; @@ -1958,7 +1933,7 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_in /* "gensim/models/word2vec_inner.pyx":111 * return this_random - * + * * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, @@ -1980,7 +1955,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente int __pyx_t_4; /* "gensim/models/word2vec_inner.pyx":118 - * + * * cdef long long a * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< * cdef unsigned long long modulo = 281474976710655ULL @@ -1999,16 +1974,16 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente /* "gensim/models/word2vec_inner.pyx":124 * cdef int d - * + * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * + * * for d in range(negative+1): */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); /* "gensim/models/word2vec_inner.pyx":126 * memset(work, 0, size * cython.sizeof(REAL_t)) - * + * * for d in range(negative+1): # <<<<<<<<<<<<<< * if d == 0: * target_index = word_index @@ -2018,7 +1993,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_v_d = __pyx_t_2; /* "gensim/models/word2vec_inner.pyx":127 - * + * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< * target_index = word_index @@ -2044,25 +2019,17 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) */ __pyx_v_label = __pyx_v_6gensim_6models_14word2vec_inner_ONEF; - - /* "gensim/models/word2vec_inner.pyx":127 - * - * for d in range(negative+1): - * if d == 0: # <<<<<<<<<<<<<< - * target_index = word_index - * label = ONEF - */ goto __pyx_L5; } + /*else*/ { - /* "gensim/models/word2vec_inner.pyx":131 + /* "gensim/models/word2vec_inner.pyx":131 * label = ONEF * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: */ - /*else*/ { __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); /* "gensim/models/word2vec_inner.pyx":132 @@ -2089,24 +2056,16 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * if target_index == word_index: * continue # <<<<<<<<<<<<<< * label = 0.0 - * + * */ goto __pyx_L3_continue; - - /* "gensim/models/word2vec_inner.pyx":133 - * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 - */ } /* "gensim/models/word2vec_inner.pyx":135 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< - * + * * row2 = target_index * size */ __pyx_v_label = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); @@ -2115,7 +2074,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente /* "gensim/models/word2vec_inner.pyx":137 * label = 0.0 - * + * * row2 = target_index * size # <<<<<<<<<<<<<< * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: @@ -2123,7 +2082,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); /* "gensim/models/word2vec_inner.pyx":138 - * + * * row2 = target_index * size * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< * if f <= -MAX_EXP or f >= MAX_EXP: @@ -2157,14 +2116,6 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * g = (label - f) * alpha */ goto __pyx_L3_continue; - - /* "gensim/models/word2vec_inner.pyx":139 - * row2 = target_index * size - * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ } /* "gensim/models/word2vec_inner.pyx":141 @@ -2190,7 +2141,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * + * */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); @@ -2198,7 +2149,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * + * * our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); @@ -2207,26 +2158,26 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente /* "gensim/models/word2vec_inner.pyx":146 * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * + * * our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< - * + * * return next_random */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); /* "gensim/models/word2vec_inner.pyx":148 * our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) - * + * * return next_random # <<<<<<<<<<<<<< - * - * + * + * */ __pyx_r = __pyx_v_next_random; goto __pyx_L0; /* "gensim/models/word2vec_inner.pyx":111 * return this_random - * + * * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, @@ -2238,8 +2189,8 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente } /* "gensim/models/word2vec_inner.pyx":151 - * - * + * + * * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, @@ -2264,13 +2215,13 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx * cdef long long row2 * cdef REAL_t f, g, count, inv_count = 1.0 # <<<<<<<<<<<<<< * cdef int m - * + * */ __pyx_v_inv_count = 1.0; /* "gensim/models/word2vec_inner.pyx":162 * cdef int m - * + * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * count = 0.0 * for m in range(j, k): @@ -2278,7 +2229,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); /* "gensim/models/word2vec_inner.pyx":163 - * + * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< * for m in range(j, k): @@ -2315,24 +2266,16 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx * count += ONEF */ goto __pyx_L3_continue; - - /* "gensim/models/word2vec_inner.pyx":165 - * count = 0.0 - * for m in range(j, k): - * if m == i: # <<<<<<<<<<<<<< - * continue - * else: - */ } + /*else*/ { - /* "gensim/models/word2vec_inner.pyx":168 + /* "gensim/models/word2vec_inner.pyx":168 * continue * else: * count += ONEF # <<<<<<<<<<<<<< * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if count > (0.5): */ - /*else*/ { __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14word2vec_inner_ONEF); /* "gensim/models/word2vec_inner.pyx":169 @@ -2365,22 +2308,16 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) */ __pyx_v_inv_count = (__pyx_v_6gensim_6models_14word2vec_inner_ONEF / __pyx_v_count); - - /* "gensim/models/word2vec_inner.pyx":170 - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count - * if cbow_mean: - */ + goto __pyx_L6; } + __pyx_L6:; /* "gensim/models/word2vec_inner.pyx":172 * if count > (0.5): * inv_count = ONEF/count * if cbow_mean: # <<<<<<<<<<<<<< * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) - * + * */ __pyx_t_3 = (__pyx_v_cbow_mean != 0); if (__pyx_t_3) { @@ -2389,23 +2326,17 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx * inv_count = ONEF/count * if cbow_mean: * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< - * + * * memset(work, 0, size * cython.sizeof(REAL_t)) */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - - /* "gensim/models/word2vec_inner.pyx":172 - * if count > (0.5): - * inv_count = ONEF/count - * if cbow_mean: # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) - * - */ + goto __pyx_L7; } + __pyx_L7:; /* "gensim/models/word2vec_inner.pyx":175 * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) - * + * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * for b in range(codelens[i]): * row2 = word_point[b] * size @@ -2413,7 +2344,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); /* "gensim/models/word2vec_inner.pyx":176 - * + * * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelens[i]): # <<<<<<<<<<<<<< * row2 = word_point[b] * size @@ -2467,14 +2398,6 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx * g = (1 - word_code[b] - f) * alpha */ goto __pyx_L8_continue; - - /* "gensim/models/word2vec_inner.pyx":179 - * row2 = word_point[b] * size - * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ } /* "gensim/models/word2vec_inner.pyx":181 @@ -2500,7 +2423,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * + * */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); @@ -2508,7 +2431,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * + * * if not cbow_mean: # divide error over summed window vectors */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); @@ -2517,35 +2440,29 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx /* "gensim/models/word2vec_inner.pyx":186 * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * + * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) - * + * */ __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); if (__pyx_t_3) { /* "gensim/models/word2vec_inner.pyx":187 - * + * * if not cbow_mean: # divide error over summed window vectors * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< - * + * * for m in range(j, k): */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_work, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - - /* "gensim/models/word2vec_inner.pyx":186 - * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * - * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) - * - */ + goto __pyx_L13; } + __pyx_L13:; /* "gensim/models/word2vec_inner.pyx":189 * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) - * + * * for m in range(j, k): # <<<<<<<<<<<<<< * if m == i: * continue @@ -2555,7 +2472,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx __pyx_v_m = __pyx_t_2; /* "gensim/models/word2vec_inner.pyx":190 - * + * * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< * continue @@ -2572,32 +2489,24 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx * our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) */ goto __pyx_L14_continue; - - /* "gensim/models/word2vec_inner.pyx":190 - * - * for m in range(j, k): - * if m == i: # <<<<<<<<<<<<<< - * continue - * else: - */ } + /*else*/ { - /* "gensim/models/word2vec_inner.pyx":193 + /* "gensim/models/word2vec_inner.pyx":193 * continue * else: * our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) # <<<<<<<<<<<<<< - * - * + * + * */ - /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); } __pyx_L14_continue:; } /* "gensim/models/word2vec_inner.pyx":151 - * - * + * + * * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, @@ -2607,8 +2516,8 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx } /* "gensim/models/word2vec_inner.pyx":196 - * - * + * + * * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, int codelens[MAX_SENTENCE_LEN], * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, @@ -2653,16 +2562,16 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente /* "gensim/models/word2vec_inner.pyx":209 * cdef int d, m - * + * * word_index = indexes[i] # <<<<<<<<<<<<<< - * + * * memset(neu1, 0, size * cython.sizeof(REAL_t)) */ __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); /* "gensim/models/word2vec_inner.pyx":211 * word_index = indexes[i] - * + * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * count = 0.0 * for m in range(j, k): @@ -2670,7 +2579,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); /* "gensim/models/word2vec_inner.pyx":212 - * + * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< * for m in range(j, k): @@ -2707,24 +2616,16 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * count += ONEF */ goto __pyx_L3_continue; - - /* "gensim/models/word2vec_inner.pyx":214 - * count = 0.0 - * for m in range(j, k): - * if m == i: # <<<<<<<<<<<<<< - * continue - * else: - */ } + /*else*/ { - /* "gensim/models/word2vec_inner.pyx":217 + /* "gensim/models/word2vec_inner.pyx":217 * continue * else: * count += ONEF # <<<<<<<<<<<<<< * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if count > (0.5): */ - /*else*/ { __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14word2vec_inner_ONEF); /* "gensim/models/word2vec_inner.pyx":218 @@ -2757,22 +2658,16 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) */ __pyx_v_inv_count = (__pyx_v_6gensim_6models_14word2vec_inner_ONEF / __pyx_v_count); - - /* "gensim/models/word2vec_inner.pyx":219 - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count - * if cbow_mean: - */ + goto __pyx_L6; } + __pyx_L6:; /* "gensim/models/word2vec_inner.pyx":221 * if count > (0.5): * inv_count = ONEF/count * if cbow_mean: # <<<<<<<<<<<<<< * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) - * + * */ __pyx_t_3 = (__pyx_v_cbow_mean != 0); if (__pyx_t_3) { @@ -2781,32 +2676,26 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * inv_count = ONEF/count * if cbow_mean: * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< - * + * * memset(work, 0, size * cython.sizeof(REAL_t)) */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - - /* "gensim/models/word2vec_inner.pyx":221 - * if count > (0.5): - * inv_count = ONEF/count - * if cbow_mean: # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) - * - */ + goto __pyx_L7; } + __pyx_L7:; /* "gensim/models/word2vec_inner.pyx":224 * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) - * + * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * + * * for d in range(negative+1): */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); /* "gensim/models/word2vec_inner.pyx":226 * memset(work, 0, size * cython.sizeof(REAL_t)) - * + * * for d in range(negative+1): # <<<<<<<<<<<<<< * if d == 0: * target_index = word_index @@ -2816,7 +2705,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_v_d = __pyx_t_1; /* "gensim/models/word2vec_inner.pyx":227 - * + * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< * target_index = word_index @@ -2842,25 +2731,17 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) */ __pyx_v_label = __pyx_v_6gensim_6models_14word2vec_inner_ONEF; - - /* "gensim/models/word2vec_inner.pyx":227 - * - * for d in range(negative+1): - * if d == 0: # <<<<<<<<<<<<<< - * target_index = word_index - * label = ONEF - */ goto __pyx_L10; } + /*else*/ { - /* "gensim/models/word2vec_inner.pyx":231 + /* "gensim/models/word2vec_inner.pyx":231 * label = ONEF * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: */ - /*else*/ { __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); /* "gensim/models/word2vec_inner.pyx":232 @@ -2887,24 +2768,16 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * if target_index == word_index: * continue # <<<<<<<<<<<<<< * label = 0.0 - * + * */ goto __pyx_L8_continue; - - /* "gensim/models/word2vec_inner.pyx":233 - * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 - */ } /* "gensim/models/word2vec_inner.pyx":235 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< - * + * * row2 = target_index * size */ __pyx_v_label = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); @@ -2913,7 +2786,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente /* "gensim/models/word2vec_inner.pyx":237 * label = 0.0 - * + * * row2 = target_index * size # <<<<<<<<<<<<<< * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: @@ -2921,7 +2794,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); /* "gensim/models/word2vec_inner.pyx":238 - * + * * row2 = target_index * size * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< * if f <= -MAX_EXP or f >= MAX_EXP: @@ -2955,14 +2828,6 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * g = (label - f) * alpha */ goto __pyx_L8_continue; - - /* "gensim/models/word2vec_inner.pyx":239 - * row2 = target_index * size - * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ } /* "gensim/models/word2vec_inner.pyx":241 @@ -2988,7 +2853,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * + * */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); @@ -2996,7 +2861,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * + * * if not cbow_mean: # divide error over summed window vectors */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); @@ -3005,35 +2870,29 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente /* "gensim/models/word2vec_inner.pyx":246 * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * + * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) - * + * */ __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); if (__pyx_t_3) { /* "gensim/models/word2vec_inner.pyx":247 - * + * * if not cbow_mean: # divide error over summed window vectors * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< - * + * * for m in range(j,k): */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_work, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - - /* "gensim/models/word2vec_inner.pyx":246 - * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * - * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) - * - */ + goto __pyx_L15; } + __pyx_L15:; /* "gensim/models/word2vec_inner.pyx":249 * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) - * + * * for m in range(j,k): # <<<<<<<<<<<<<< * if m == i: * continue @@ -3043,7 +2902,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_v_m = __pyx_t_2; /* "gensim/models/word2vec_inner.pyx":250 - * + * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< * continue @@ -3060,24 +2919,16 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente * our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) */ goto __pyx_L16_continue; - - /* "gensim/models/word2vec_inner.pyx":250 - * - * for m in range(j,k): - * if m == i: # <<<<<<<<<<<<<< - * continue - * else: - */ } + /*else*/ { - /* "gensim/models/word2vec_inner.pyx":253 + /* "gensim/models/word2vec_inner.pyx":253 * continue * else: * our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * + * * return next_random */ - /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); } __pyx_L16_continue:; @@ -3085,17 +2936,17 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente /* "gensim/models/word2vec_inner.pyx":255 * our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) - * + * * return next_random # <<<<<<<<<<<<<< - * - * + * + * */ __pyx_r = __pyx_v_next_random; goto __pyx_L0; /* "gensim/models/word2vec_inner.pyx":196 - * - * + * + * * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, int codelens[MAX_SENTENCE_LEN], * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, @@ -3107,8 +2958,8 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente } /* "gensim/models/word2vec_inner.pyx":258 - * - * + * + * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative @@ -3203,9 +3054,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v__alpha; int __pyx_v_size; - int __pyx_v_codelens[0x2710]; - __pyx_t_5numpy_uint32_t __pyx_v_indexes[0x2710]; - __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[0x2710]; + int __pyx_v_codelens[100000]; + __pyx_t_5numpy_uint32_t __pyx_v_indexes[100000]; + __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[100000]; int __pyx_v_sentence_len; int __pyx_v_window; int __pyx_v_i; @@ -3213,8 +3064,8 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT int __pyx_v_k; long __pyx_v_result; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1; - __pyx_t_5numpy_uint32_t *__pyx_v_points[0x2710]; - __pyx_t_5numpy_uint8_t *__pyx_v_codes[0x2710]; + __pyx_t_5numpy_uint32_t *__pyx_v_points[100000]; + __pyx_t_5numpy_uint8_t *__pyx_v_codes[100000]; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg; __pyx_t_5numpy_uint32_t *__pyx_v_cum_table; unsigned PY_LONG_LONG __pyx_v_cum_table_len; @@ -3249,7 +3100,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT __Pyx_RefNannySetupContext("train_sentence_sg", 0); /* "gensim/models/word2vec_inner.pyx":259 - * + * * def train_sentence_sg(model, sentence, alpha, _work): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative @@ -3266,7 +3117,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.sample != 0) - * + * */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -3278,7 +3129,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 261; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3291,7 +3142,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT /* "gensim/models/word2vec_inner.pyx":263 * cdef int sample = (model.sample != 0) - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) * cdef REAL_t *work @@ -3303,7 +3154,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gensim/models/word2vec_inner.pyx":264 - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *work @@ -3320,7 +3171,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * cdef REAL_t *work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size - * + * */ __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 266; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_4; @@ -3329,7 +3180,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * cdef REAL_t *work * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< - * + * * cdef int codelens[MAX_SENTENCE_LEN] */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3342,7 +3193,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< - * + * * cdef int i, j, k */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 273; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3352,29 +3203,29 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT __pyx_v_window = __pyx_t_2; /* "gensim/models/word2vec_inner.pyx":276 - * + * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< - * + * * # For hierarchical softmax */ __pyx_v_result = 0; /* "gensim/models/word2vec_inner.pyx":290 * cdef unsigned long long next_random - * + * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) - * + * */ __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { /* "gensim/models/word2vec_inner.pyx":291 - * + * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< - * + * * if negative: */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3382,19 +3233,13 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "gensim/models/word2vec_inner.pyx":290 - * cdef unsigned long long next_random - * - * if hs: # <<<<<<<<<<<<<< - * syn1 = (np.PyArray_DATA(model.syn1)) - * - */ + goto __pyx_L3; } + __pyx_L3:; /* "gensim/models/word2vec_inner.pyx":293 * syn1 = (np.PyArray_DATA(model.syn1)) - * + * * if negative: # <<<<<<<<<<<<<< * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) @@ -3403,7 +3248,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT if (__pyx_t_5) { /* "gensim/models/word2vec_inner.pyx":294 - * + * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) @@ -3440,22 +3285,16 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 296; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - - /* "gensim/models/word2vec_inner.pyx":293 - * syn1 = (np.PyArray_DATA(model.syn1)) - * - * if negative: # <<<<<<<<<<<<<< - * syn1neg = (np.PyArray_DATA(model.syn1neg)) - * cum_table = (np.PyArray_DATA(model.cum_table)) - */ + goto __pyx_L4; } + __pyx_L4:; /* "gensim/models/word2vec_inner.pyx":297 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) - * + * */ __pyx_t_7 = (__pyx_v_negative != 0); if (!__pyx_t_7) { @@ -3472,7 +3311,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< - * + * * # convert Python structures to primitive types, so we can release the GIL */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3501,21 +3340,15 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - - /* "gensim/models/word2vec_inner.pyx":297 - * cum_table = (np.PyArray_DATA(model.cum_table)) - * cum_table_len = len(model.cum_table) - * if negative or sample: # <<<<<<<<<<<<<< - * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) - * - */ + goto __pyx_L5; } + __pyx_L5:; /* "gensim/models/word2vec_inner.pyx":301 - * + * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< - * + * * vlookup = model.vocab */ if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3523,7 +3356,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT /* "gensim/models/word2vec_inner.pyx":303 * work = np.PyArray_DATA(_work) - * + * * vlookup = model.vocab # <<<<<<<<<<<<<< * i = 0 * for token in sentence: @@ -3534,7 +3367,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT __pyx_t_8 = 0; /* "gensim/models/word2vec_inner.pyx":304 - * + * * vlookup = model.vocab * i = 0 # <<<<<<<<<<<<<< * for token in sentence: @@ -3598,7 +3431,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * if word is None: * continue # leaving i unchanged/shortening sentence */ - __pyx_t_5 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = (__Pyx_PySequence_Contains(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if ((__pyx_t_5 != 0)) { __pyx_t_1 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_1); @@ -3630,14 +3463,6 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * continue */ goto __pyx_L8_continue; - - /* "gensim/models/word2vec_inner.pyx":307 - * for token in sentence: - * word = vlookup[token] if token in vlookup else None - * if word is None: # <<<<<<<<<<<<<< - * continue # leaving i unchanged/shortening sentence - * if sample and word.sample_int < random_int32(&next_random): - */ } /* "gensim/models/word2vec_inner.pyx":309 @@ -3674,14 +3499,6 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * if hs: */ goto __pyx_L8_continue; - - /* "gensim/models/word2vec_inner.pyx":309 - * if word is None: - * continue # leaving i unchanged/shortening sentence - * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< - * continue - * indexes[i] = word.index - */ } /* "gensim/models/word2vec_inner.pyx":311 @@ -3745,15 +3562,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT if (!(likely(((__pyx_t_11) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_11, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 315; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_11))); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - - /* "gensim/models/word2vec_inner.pyx":312 - * continue - * indexes[i] = word.index - * if hs: # <<<<<<<<<<<<<< - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) - */ - } + goto __pyx_L14; + } + __pyx_L14:; /* "gensim/models/word2vec_inner.pyx":316 * codes[i] = np.PyArray_DATA(word.code) @@ -3780,7 +3591,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * break # TODO: log warning, tally overflow? * sentence_len = i */ - __pyx_t_7 = ((__pyx_v_i == 0x2710) != 0); + __pyx_t_7 = ((__pyx_v_i == 100000) != 0); if (__pyx_t_7) { /* "gensim/models/word2vec_inner.pyx":319 @@ -3788,17 +3599,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * if i == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< * sentence_len = i - * + * */ goto __pyx_L9_break; - - /* "gensim/models/word2vec_inner.pyx":318 - * result += 1 - * i += 1 - * if i == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< - * break # TODO: log warning, tally overflow? - * sentence_len = i - */ } /* "gensim/models/word2vec_inner.pyx":305 @@ -3817,17 +3620,17 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * if i == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? * sentence_len = i # <<<<<<<<<<<<<< - * + * * # single randint() call avoids a big thread-sync slowdown */ __pyx_v_sentence_len = __pyx_v_i; /* "gensim/models/word2vec_inner.pyx":323 - * + * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item - * + * */ __pyx_t_2 = 0; __pyx_t_11 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3918,24 +3721,24 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< - * + * * # release GIL & train on the sentence */ __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_12 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 324; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_12; /* "gensim/models/word2vec_inner.pyx":323 - * + * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item - * + * */ } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gensim/models/word2vec_inner.pyx":327 - * + * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< * for i in range(sentence_len): @@ -3986,15 +3789,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * if k > sentence_len: */ __pyx_v_j = 0; - - /* "gensim/models/word2vec_inner.pyx":330 - * for i in range(sentence_len): - * j = i - window + reduced_windows[i] - * if j < 0: # <<<<<<<<<<<<<< - * j = 0 - * k = i + window + 1 - reduced_windows[i] - */ + goto __pyx_L23; } + __pyx_L23:; /* "gensim/models/word2vec_inner.pyx":332 * if j < 0: @@ -4023,15 +3820,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * if j == i: */ __pyx_v_k = __pyx_v_sentence_len; - - /* "gensim/models/word2vec_inner.pyx":333 - * j = 0 - * k = i + window + 1 - reduced_windows[i] - * if k > sentence_len: # <<<<<<<<<<<<<< - * k = sentence_len - * for j in range(j, k): - */ + goto __pyx_L24; } + __pyx_L24:; /* "gensim/models/word2vec_inner.pyx":335 * if k > sentence_len: @@ -4062,14 +3853,6 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) */ goto __pyx_L25_continue; - - /* "gensim/models/word2vec_inner.pyx":336 - * k = sentence_len - * for j in range(j, k): - * if j == i: # <<<<<<<<<<<<<< - * continue - * if hs: - */ } /* "gensim/models/word2vec_inner.pyx":338 @@ -4090,22 +3873,16 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) */ __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_word_locks); - - /* "gensim/models/word2vec_inner.pyx":338 - * if j == i: - * continue - * if hs: # <<<<<<<<<<<<<< - * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) - * if negative: - */ + goto __pyx_L28; } + __pyx_L28:; /* "gensim/models/word2vec_inner.pyx":340 * if hs: * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) * if negative: # <<<<<<<<<<<<<< * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) - * + * */ __pyx_t_7 = (__pyx_v_negative != 0); if (__pyx_t_7) { @@ -4114,26 +3891,20 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) * if negative: * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) # <<<<<<<<<<<<<< - * + * * return result */ __pyx_v_next_random = __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random, __pyx_v_word_locks); - - /* "gensim/models/word2vec_inner.pyx":340 - * if hs: - * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) - * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) - * - */ + goto __pyx_L29; } + __pyx_L29:; __pyx_L25_continue:; } } } /* "gensim/models/word2vec_inner.pyx":327 - * + * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< * for i in range(sentence_len): @@ -4152,10 +3923,10 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT /* "gensim/models/word2vec_inner.pyx":343 * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) - * + * * return result # <<<<<<<<<<<<<< - * - * + * + * */ __Pyx_XDECREF(__pyx_r); __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 343; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -4165,8 +3936,8 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT goto __pyx_L0; /* "gensim/models/word2vec_inner.pyx":258 - * - * + * + * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative @@ -4193,8 +3964,8 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT } /* "gensim/models/word2vec_inner.pyx":346 - * - * + * + * * def train_batch_sg(model, sentences, alpha, sentence_indeces, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative @@ -4298,11 +4069,11 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v__alpha; int __pyx_v_size; - int __pyx_v_codelens[0x2710]; - __pyx_t_5numpy_uint32_t __pyx_v_indexes[0x2710]; - __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[0x2710]; - int __pyx_v_sentence_len[0x3E8]; - int __pyx_v_sentence_indeces_c[0x3E8]; + int __pyx_v_codelens[100000]; + __pyx_t_5numpy_uint32_t __pyx_v_indexes[100000]; + __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[100000]; + int __pyx_v_sentence_len[1000]; + int __pyx_v_sentence_indeces_c[1000]; int __pyx_v_window; int __pyx_v_i; int __pyx_v_j; @@ -4312,8 +4083,8 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO int __pyx_v_num_sentences; int __pyx_v_sent_idx; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1; - __pyx_t_5numpy_uint32_t *__pyx_v_points[0x2710]; - __pyx_t_5numpy_uint8_t *__pyx_v_codes[0x2710]; + __pyx_t_5numpy_uint32_t *__pyx_v_points[100000]; + __pyx_t_5numpy_uint8_t *__pyx_v_codes[100000]; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg; __pyx_t_5numpy_uint32_t *__pyx_v_cum_table; unsigned PY_LONG_LONG __pyx_v_cum_table_len; @@ -4358,7 +4129,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO __Pyx_RefNannySetupContext("train_batch_sg", 0); /* "gensim/models/word2vec_inner.pyx":347 - * + * * def train_batch_sg(model, sentences, alpha, sentence_indeces, _work): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative @@ -4375,7 +4146,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.sample != 0) - * + * */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 348; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -4387,7 +4158,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 349; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -4400,7 +4171,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO /* "gensim/models/word2vec_inner.pyx":351 * cdef int sample = (model.sample != 0) - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) * cdef REAL_t *work @@ -4412,7 +4183,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gensim/models/word2vec_inner.pyx":352 - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *work @@ -4429,7 +4200,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * cdef REAL_t *work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size - * + * */ __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_4; @@ -4438,7 +4209,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * cdef REAL_t *work * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< - * + * * cdef int codelens[MAX_SENTENCE_LEN] */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -4451,7 +4222,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * cdef int sentence_len[MAX_NUM_SENTENCES] * cdef int sentence_indeces_c[MAX_NUM_SENTENCES] * cdef int window = model.window # <<<<<<<<<<<<<< - * + * * cdef int i, j, k, m */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 362; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -4461,7 +4232,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO __pyx_v_window = __pyx_t_2; /* "gensim/models/word2vec_inner.pyx":365 - * + * * cdef int i, j, k, m * cdef long result = 0 # <<<<<<<<<<<<<< * cdef int num_sentences = 0 @@ -4474,7 +4245,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * cdef long result = 0 * cdef int num_sentences = 0 # <<<<<<<<<<<<<< * cdef int sent_idx = 0 - * + * */ __pyx_v_num_sentences = 0; @@ -4482,26 +4253,26 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * cdef long result = 0 * cdef int num_sentences = 0 * cdef int sent_idx = 0 # <<<<<<<<<<<<<< - * + * * # For hierarchical softmax */ __pyx_v_sent_idx = 0; /* "gensim/models/word2vec_inner.pyx":381 * cdef unsigned long long next_random - * + * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) - * + * */ __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { /* "gensim/models/word2vec_inner.pyx":382 - * + * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< - * + * * if negative: */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -4509,19 +4280,13 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "gensim/models/word2vec_inner.pyx":381 - * cdef unsigned long long next_random - * - * if hs: # <<<<<<<<<<<<<< - * syn1 = (np.PyArray_DATA(model.syn1)) - * - */ + goto __pyx_L3; } + __pyx_L3:; /* "gensim/models/word2vec_inner.pyx":384 * syn1 = (np.PyArray_DATA(model.syn1)) - * + * * if negative: # <<<<<<<<<<<<<< * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) @@ -4530,7 +4295,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO if (__pyx_t_5) { /* "gensim/models/word2vec_inner.pyx":385 - * + * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) @@ -4567,22 +4332,16 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - - /* "gensim/models/word2vec_inner.pyx":384 - * syn1 = (np.PyArray_DATA(model.syn1)) - * - * if negative: # <<<<<<<<<<<<<< - * syn1neg = (np.PyArray_DATA(model.syn1neg)) - * cum_table = (np.PyArray_DATA(model.cum_table)) - */ + goto __pyx_L4; } + __pyx_L4:; /* "gensim/models/word2vec_inner.pyx":388 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) - * + * */ __pyx_t_7 = (__pyx_v_negative != 0); if (!__pyx_t_7) { @@ -4599,7 +4358,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< - * + * * # convert Python structures to primitive types, so we can release the GIL */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -4628,21 +4387,15 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - - /* "gensim/models/word2vec_inner.pyx":388 - * cum_table = (np.PyArray_DATA(model.cum_table)) - * cum_table_len = len(model.cum_table) - * if negative or sample: # <<<<<<<<<<<<<< - * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) - * - */ + goto __pyx_L5; } + __pyx_L5:; /* "gensim/models/word2vec_inner.pyx":392 - * + * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< - * + * * vlookup = model.vocab */ if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -4650,7 +4403,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO /* "gensim/models/word2vec_inner.pyx":394 * work = np.PyArray_DATA(_work) - * + * * vlookup = model.vocab # <<<<<<<<<<<<<< * for sent_idx, idx in enumerate(range(len(sentence_indeces) - 1)): * idx1 = sentence_indeces[idx] @@ -4661,7 +4414,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO __pyx_t_8 = 0; /* "gensim/models/word2vec_inner.pyx":395 - * + * * vlookup = model.vocab * for sent_idx, idx in enumerate(range(len(sentence_indeces) - 1)): # <<<<<<<<<<<<<< * idx1 = sentence_indeces[idx] @@ -4743,7 +4496,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * sentence = sentences[idx1:idx2] * sentence_indeces_c[sent_idx] = idx1 */ - __pyx_t_8 = __Pyx_PyInt_AddObjC(__pyx_v_idx, __pyx_int_1, 1, 0); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyNumber_Add(__pyx_v_idx, __pyx_int_1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __pyx_t_1 = PyObject_GetItem(__pyx_v_sentence_indeces, __pyx_t_8); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_1); @@ -4838,7 +4591,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * if word is None: * continue # leaving i unchanged/shortening sentence */ - __pyx_t_5 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = (__Pyx_PySequence_Contains(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if ((__pyx_t_5 != 0)) { __pyx_t_14 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(__pyx_t_14 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 402; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_14); @@ -4870,14 +4623,6 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * continue */ goto __pyx_L10_continue; - - /* "gensim/models/word2vec_inner.pyx":403 - * for token in sentence: - * word = vlookup[token] if token in vlookup else None - * if word is None: # <<<<<<<<<<<<<< - * continue # leaving i unchanged/shortening sentence - * if sample and word.sample_int < random_int32(&next_random): - */ } /* "gensim/models/word2vec_inner.pyx":405 @@ -4914,14 +4659,6 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * if hs: */ goto __pyx_L10_continue; - - /* "gensim/models/word2vec_inner.pyx":405 - * if word is None: - * continue # leaving i unchanged/shortening sentence - * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< - * continue - * indexes[idx1 + i] = word.index - */ } /* "gensim/models/word2vec_inner.pyx":407 @@ -5013,15 +4750,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; (__pyx_v_points[__pyx_t_17]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_15))); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - - /* "gensim/models/word2vec_inner.pyx":408 - * continue - * indexes[idx1 + i] = word.index - * if hs: # <<<<<<<<<<<<<< - * codelens[idx1 + i] = len(word.code) - * codes[idx1 + i] = np.PyArray_DATA(word.code) - */ + goto __pyx_L16; } + __pyx_L16:; /* "gensim/models/word2vec_inner.pyx":412 * codes[idx1 + i] = np.PyArray_DATA(word.code) @@ -5048,7 +4779,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * break # TODO: log warning, tally overflow? * sentence_len[sent_idx] = i */ - __pyx_t_7 = ((__pyx_v_i == 0x2710) != 0); + __pyx_t_7 = ((__pyx_v_i == 100000) != 0); if (__pyx_t_7) { /* "gensim/models/word2vec_inner.pyx":415 @@ -5056,17 +4787,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * if i == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< * sentence_len[sent_idx] = i - * + * */ goto __pyx_L11_break; - - /* "gensim/models/word2vec_inner.pyx":414 - * result += 1 - * i += 1 - * if i == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< - * break # TODO: log warning, tally overflow? - * sentence_len[sent_idx] = i - */ } /* "gensim/models/word2vec_inner.pyx":401 @@ -5085,17 +4808,17 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * if i == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? * sentence_len[sent_idx] = i # <<<<<<<<<<<<<< - * + * * # single randint() call avoids a big thread-sync slowdown */ (__pyx_v_sentence_len[__pyx_v_sent_idx]) = __pyx_v_i; /* "gensim/models/word2vec_inner.pyx":419 - * + * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len[sent_idx])): # <<<<<<<<<<<<<< * reduced_windows[idx1 + i] = item - * + * */ __pyx_t_11 = 0; __pyx_t_15 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 419; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -5186,7 +4909,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len[sent_idx])): * reduced_windows[idx1 + i] = item # <<<<<<<<<<<<<< - * + * * num_sentences += 1 */ __pyx_t_16 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_16 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 420; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -5200,26 +4923,26 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO (__pyx_v_reduced_windows[__pyx_t_17]) = __pyx_t_16; /* "gensim/models/word2vec_inner.pyx":419 - * + * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len[sent_idx])): # <<<<<<<<<<<<<< * reduced_windows[idx1 + i] = item - * + * */ } __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; /* "gensim/models/word2vec_inner.pyx":422 * reduced_windows[idx1 + i] = item - * + * * num_sentences += 1 # <<<<<<<<<<<<<< - * - * + * + * */ __pyx_v_num_sentences = (__pyx_v_num_sentences + 1); /* "gensim/models/word2vec_inner.pyx":395 - * + * * vlookup = model.vocab * for sent_idx, idx in enumerate(range(len(sentence_indeces) - 1)): # <<<<<<<<<<<<<< * idx1 = sentence_indeces[idx] @@ -5229,7 +4952,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gensim/models/word2vec_inner.pyx":426 - * + * * # release GIL & train on the sentences * with nogil: # <<<<<<<<<<<<<< * for sent_idx in range(num_sentences): @@ -5300,15 +5023,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * if k > sentence_len[sent_idx]: */ __pyx_v_j = 0; - - /* "gensim/models/word2vec_inner.pyx":431 - * for i in range(sentence_len[sent_idx]): - * j = i - window + reduced_windows[m + i] - * if j < 0: # <<<<<<<<<<<<<< - * j = 0 - * k = i + window + 1 - reduced_windows[m + i] - */ + goto __pyx_L27; } + __pyx_L27:; /* "gensim/models/word2vec_inner.pyx":433 * if j < 0: @@ -5337,15 +5054,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * if j == i: */ __pyx_v_k = (__pyx_v_sentence_len[__pyx_v_sent_idx]); - - /* "gensim/models/word2vec_inner.pyx":434 - * j = 0 - * k = i + window + 1 - reduced_windows[m + i] - * if k > sentence_len[sent_idx]: # <<<<<<<<<<<<<< - * k = sentence_len[sent_idx] - * for j in range(j, k): - */ + goto __pyx_L28; } + __pyx_L28:; /* "gensim/models/word2vec_inner.pyx":436 * if k > sentence_len[sent_idx]: @@ -5376,14 +5087,6 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * fast_sentence_sg_hs(points[m + i], codes[m + i], codelens[m + i], syn0, syn1, size, indexes[m + j], _alpha, work, word_locks) */ goto __pyx_L29_continue; - - /* "gensim/models/word2vec_inner.pyx":437 - * k = sentence_len[sent_idx] - * for j in range(j, k): - * if j == i: # <<<<<<<<<<<<<< - * continue - * if hs: - */ } /* "gensim/models/word2vec_inner.pyx":439 @@ -5404,22 +5107,16 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[m + i], indexes[m + j], _alpha, work, next_random, word_locks) */ __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs((__pyx_v_points[(__pyx_v_m + __pyx_v_i)]), (__pyx_v_codes[(__pyx_v_m + __pyx_v_i)]), (__pyx_v_codelens[(__pyx_v_m + __pyx_v_i)]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[(__pyx_v_m + __pyx_v_j)]), __pyx_v__alpha, __pyx_v_work, __pyx_v_word_locks); - - /* "gensim/models/word2vec_inner.pyx":439 - * if j == i: - * continue - * if hs: # <<<<<<<<<<<<<< - * fast_sentence_sg_hs(points[m + i], codes[m + i], codelens[m + i], syn0, syn1, size, indexes[m + j], _alpha, work, word_locks) - * if negative: - */ + goto __pyx_L32; } + __pyx_L32:; /* "gensim/models/word2vec_inner.pyx":441 * if hs: * fast_sentence_sg_hs(points[m + i], codes[m + i], codelens[m + i], syn0, syn1, size, indexes[m + j], _alpha, work, word_locks) * if negative: # <<<<<<<<<<<<<< * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[m + i], indexes[m + j], _alpha, work, next_random, word_locks) - * + * */ __pyx_t_7 = (__pyx_v_negative != 0); if (__pyx_t_7) { @@ -5428,19 +5125,13 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO * fast_sentence_sg_hs(points[m + i], codes[m + i], codelens[m + i], syn0, syn1, size, indexes[m + j], _alpha, work, word_locks) * if negative: * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[m + i], indexes[m + j], _alpha, work, next_random, word_locks) # <<<<<<<<<<<<<< - * + * * return result */ __pyx_v_next_random = __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[(__pyx_v_m + __pyx_v_i)]), (__pyx_v_indexes[(__pyx_v_m + __pyx_v_j)]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random, __pyx_v_word_locks); - - /* "gensim/models/word2vec_inner.pyx":441 - * if hs: - * fast_sentence_sg_hs(points[m + i], codes[m + i], codelens[m + i], syn0, syn1, size, indexes[m + j], _alpha, work, word_locks) - * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[m + i], indexes[m + j], _alpha, work, next_random, word_locks) - * - */ + goto __pyx_L33; } + __pyx_L33:; __pyx_L29_continue:; } } @@ -5448,7 +5139,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO } /* "gensim/models/word2vec_inner.pyx":426 - * + * * # release GIL & train on the sentences * with nogil: # <<<<<<<<<<<<<< * for sent_idx in range(num_sentences): @@ -5467,10 +5158,10 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO /* "gensim/models/word2vec_inner.pyx":444 * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[m + i], indexes[m + j], _alpha, work, next_random, word_locks) - * + * * return result # <<<<<<<<<<<<<< - * - * + * + * */ __Pyx_XDECREF(__pyx_r); __pyx_t_3 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -5480,8 +5171,8 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO goto __pyx_L0; /* "gensim/models/word2vec_inner.pyx":346 - * - * + * + * * def train_batch_sg(model, sentences, alpha, sentence_indeces, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative @@ -5513,8 +5204,8 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_batch_sg(CYTHO } /* "gensim/models/word2vec_inner.pyx":447 - * - * + * + * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative @@ -5620,9 +5311,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v__alpha; int __pyx_v_size; - int __pyx_v_codelens[0x2710]; - __pyx_t_5numpy_uint32_t __pyx_v_indexes[0x2710]; - __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[0x2710]; + int __pyx_v_codelens[100000]; + __pyx_t_5numpy_uint32_t __pyx_v_indexes[100000]; + __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[100000]; int __pyx_v_sentence_len; int __pyx_v_window; int __pyx_v_i; @@ -5630,8 +5321,8 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( int __pyx_v_k; long __pyx_v_result; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1; - __pyx_t_5numpy_uint32_t *__pyx_v_points[0x2710]; - __pyx_t_5numpy_uint8_t *__pyx_v_codes[0x2710]; + __pyx_t_5numpy_uint32_t *__pyx_v_points[100000]; + __pyx_t_5numpy_uint8_t *__pyx_v_codes[100000]; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg; __pyx_t_5numpy_uint32_t *__pyx_v_cum_table; unsigned PY_LONG_LONG __pyx_v_cum_table_len; @@ -5664,7 +5355,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( __Pyx_RefNannySetupContext("train_sentence_cbow", 0); /* "gensim/models/word2vec_inner.pyx":448 - * + * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative @@ -5694,7 +5385,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * cdef int negative = model.negative * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean - * + * */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -5708,7 +5399,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * cdef int negative = model.negative * cdef int sample = (model.sample != 0) * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 451; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -5719,7 +5410,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( /* "gensim/models/word2vec_inner.pyx":453 * cdef int cbow_mean = model.cbow_mean - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) * cdef REAL_t *work @@ -5731,7 +5422,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gensim/models/word2vec_inner.pyx":454 - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *work @@ -5748,7 +5439,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size - * + * */ __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_4; @@ -5757,7 +5448,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< - * + * * cdef int codelens[MAX_SENTENCE_LEN] */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 458; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -5770,7 +5461,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< - * + * * cdef int i, j, k */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -5780,29 +5471,29 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( __pyx_v_window = __pyx_t_2; /* "gensim/models/word2vec_inner.pyx":467 - * + * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< - * + * * # For hierarchical softmax */ __pyx_v_result = 0; /* "gensim/models/word2vec_inner.pyx":481 * cdef unsigned long long next_random - * + * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) - * + * */ __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { /* "gensim/models/word2vec_inner.pyx":482 - * + * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< - * + * * if negative: */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -5810,19 +5501,13 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "gensim/models/word2vec_inner.pyx":481 - * cdef unsigned long long next_random - * - * if hs: # <<<<<<<<<<<<<< - * syn1 = (np.PyArray_DATA(model.syn1)) - * - */ + goto __pyx_L3; } + __pyx_L3:; /* "gensim/models/word2vec_inner.pyx":484 * syn1 = (np.PyArray_DATA(model.syn1)) - * + * * if negative: # <<<<<<<<<<<<<< * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) @@ -5831,7 +5516,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( if (__pyx_t_5) { /* "gensim/models/word2vec_inner.pyx":485 - * + * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) @@ -5868,22 +5553,16 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - - /* "gensim/models/word2vec_inner.pyx":484 - * syn1 = (np.PyArray_DATA(model.syn1)) - * - * if negative: # <<<<<<<<<<<<<< - * syn1neg = (np.PyArray_DATA(model.syn1neg)) - * cum_table = (np.PyArray_DATA(model.cum_table)) - */ + goto __pyx_L4; } + __pyx_L4:; /* "gensim/models/word2vec_inner.pyx":488 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) - * + * */ __pyx_t_7 = (__pyx_v_negative != 0); if (!__pyx_t_7) { @@ -5900,7 +5579,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< - * + * * # convert Python structures to primitive types, so we can release the GIL */ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -5929,22 +5608,16 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - - /* "gensim/models/word2vec_inner.pyx":488 - * cum_table = (np.PyArray_DATA(model.cum_table)) - * cum_table_len = len(model.cum_table) - * if negative or sample: # <<<<<<<<<<<<<< - * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) - * - */ + goto __pyx_L5; } + __pyx_L5:; /* "gensim/models/word2vec_inner.pyx":492 - * + * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) - * + * */ if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 492; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); @@ -5953,7 +5626,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< - * + * * vlookup = model.vocab */ if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 493; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -5961,7 +5634,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( /* "gensim/models/word2vec_inner.pyx":495 * neu1 = np.PyArray_DATA(_neu1) - * + * * vlookup = model.vocab # <<<<<<<<<<<<<< * i = 0 * for token in sentence: @@ -5972,7 +5645,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( __pyx_t_8 = 0; /* "gensim/models/word2vec_inner.pyx":496 - * + * * vlookup = model.vocab * i = 0 # <<<<<<<<<<<<<< * for token in sentence: @@ -6036,7 +5709,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * if word is None: * continue # leaving i unchanged/shortening sentence */ - __pyx_t_5 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 498; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = (__Pyx_PySequence_Contains(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 498; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if ((__pyx_t_5 != 0)) { __pyx_t_1 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 498; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_1); @@ -6068,14 +5741,6 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * continue */ goto __pyx_L8_continue; - - /* "gensim/models/word2vec_inner.pyx":499 - * for token in sentence: - * word = vlookup[token] if token in vlookup else None - * if word is None: # <<<<<<<<<<<<<< - * continue # leaving i unchanged/shortening sentence - * if sample and word.sample_int < random_int32(&next_random): - */ } /* "gensim/models/word2vec_inner.pyx":501 @@ -6112,14 +5777,6 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * if hs: */ goto __pyx_L8_continue; - - /* "gensim/models/word2vec_inner.pyx":501 - * if word is None: - * continue # leaving i unchanged/shortening sentence - * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< - * continue - * indexes[i] = word.index - */ } /* "gensim/models/word2vec_inner.pyx":503 @@ -6183,15 +5840,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( if (!(likely(((__pyx_t_11) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_11, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 507; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_11))); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - - /* "gensim/models/word2vec_inner.pyx":504 - * continue - * indexes[i] = word.index - * if hs: # <<<<<<<<<<<<<< - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) - */ + goto __pyx_L14; } + __pyx_L14:; /* "gensim/models/word2vec_inner.pyx":508 * codes[i] = np.PyArray_DATA(word.code) @@ -6218,7 +5869,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * break # TODO: log warning, tally overflow? * sentence_len = i */ - __pyx_t_7 = ((__pyx_v_i == 0x2710) != 0); + __pyx_t_7 = ((__pyx_v_i == 100000) != 0); if (__pyx_t_7) { /* "gensim/models/word2vec_inner.pyx":511 @@ -6226,17 +5877,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * if i == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< * sentence_len = i - * + * */ goto __pyx_L9_break; - - /* "gensim/models/word2vec_inner.pyx":510 - * result += 1 - * i += 1 - * if i == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< - * break # TODO: log warning, tally overflow? - * sentence_len = i - */ } /* "gensim/models/word2vec_inner.pyx":497 @@ -6255,17 +5898,17 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * if i == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? * sentence_len = i # <<<<<<<<<<<<<< - * + * * # single randint() call avoids a big thread-sync slowdown */ __pyx_v_sentence_len = __pyx_v_i; /* "gensim/models/word2vec_inner.pyx":515 - * + * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item - * + * */ __pyx_t_2 = 0; __pyx_t_11 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 515; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -6356,24 +5999,24 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< - * + * * # release GIL & train on the sentence */ __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_12 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 516; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_12; /* "gensim/models/word2vec_inner.pyx":515 - * + * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item - * + * */ } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gensim/models/word2vec_inner.pyx":519 - * + * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< * for i in range(sentence_len): @@ -6424,15 +6067,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * if k > sentence_len: */ __pyx_v_j = 0; - - /* "gensim/models/word2vec_inner.pyx":522 - * for i in range(sentence_len): - * j = i - window + reduced_windows[i] - * if j < 0: # <<<<<<<<<<<<<< - * j = 0 - * k = i + window + 1 - reduced_windows[i] - */ + goto __pyx_L23; } + __pyx_L23:; /* "gensim/models/word2vec_inner.pyx":524 * if j < 0: @@ -6461,15 +6098,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) */ __pyx_v_k = __pyx_v_sentence_len; - - /* "gensim/models/word2vec_inner.pyx":525 - * j = 0 - * k = i + window + 1 - reduced_windows[i] - * if k > sentence_len: # <<<<<<<<<<<<<< - * k = sentence_len - * if hs: - */ + goto __pyx_L24; } + __pyx_L24:; /* "gensim/models/word2vec_inner.pyx":527 * if k > sentence_len: @@ -6489,22 +6120,16 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) */ __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_word_locks); - - /* "gensim/models/word2vec_inner.pyx":527 - * if k > sentence_len: - * k = sentence_len - * if hs: # <<<<<<<<<<<<<< - * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) - * if negative: - */ + goto __pyx_L25; } + __pyx_L25:; /* "gensim/models/word2vec_inner.pyx":529 * if hs: * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) * if negative: # <<<<<<<<<<<<<< * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) - * + * */ __pyx_t_7 = (__pyx_v_negative != 0); if (__pyx_t_7) { @@ -6513,24 +6138,18 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) * if negative: * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) # <<<<<<<<<<<<<< - * + * * return result */ __pyx_v_next_random = __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random, __pyx_v_word_locks); - - /* "gensim/models/word2vec_inner.pyx":529 - * if hs: - * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) - * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) - * - */ + goto __pyx_L26; } + __pyx_L26:; } } /* "gensim/models/word2vec_inner.pyx":519 - * + * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< * for i in range(sentence_len): @@ -6549,10 +6168,10 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( /* "gensim/models/word2vec_inner.pyx":532 * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) - * + * * return result # <<<<<<<<<<<<<< - * - * + * + * */ __Pyx_XDECREF(__pyx_r); __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 532; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -6562,8 +6181,8 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( goto __pyx_L0; /* "gensim/models/word2vec_inner.pyx":447 - * - * + * + * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative @@ -6590,10 +6209,10 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4train_sentence_cbow( } /* "gensim/models/word2vec_inner.pyx":536 - * + * * # Score is only implemented for hierarchical softmax * def score_sentence_sg(model, sentence, _work): # <<<<<<<<<<<<<< - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ @@ -6672,8 +6291,8 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work; int __pyx_v_size; - int __pyx_v_codelens[0x2710]; - __pyx_t_5numpy_uint32_t __pyx_v_indexes[0x2710]; + int __pyx_v_codelens[100000]; + __pyx_t_5numpy_uint32_t __pyx_v_indexes[100000]; int __pyx_v_sentence_len; int __pyx_v_window; int __pyx_v_i; @@ -6681,8 +6300,8 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY int __pyx_v_k; long __pyx_v_result; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1; - __pyx_t_5numpy_uint32_t *__pyx_v_points[0x2710]; - __pyx_t_5numpy_uint8_t *__pyx_v_codes[0x2710]; + __pyx_t_5numpy_uint32_t *__pyx_v_points[100000]; + __pyx_t_5numpy_uint8_t *__pyx_v_codes[100000]; PyObject *__pyx_v_vlookup = NULL; PyObject *__pyx_v_token = NULL; PyObject *__pyx_v_word = NULL; @@ -6708,7 +6327,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY /* "gensim/models/word2vec_inner.pyx":538 * def score_sentence_sg(model, sentence, _work): - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *work * cdef int size = model.layer1_size @@ -6723,7 +6342,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) * cdef REAL_t *work * cdef int size = model.layer1_size # <<<<<<<<<<<<<< - * + * * cdef int codelens[MAX_SENTENCE_LEN] */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 540; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -6736,7 +6355,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY * cdef np.uint32_t indexes[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< - * + * * cdef int i, j, k */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 545; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -6746,19 +6365,19 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY __pyx_v_window = __pyx_t_2; /* "gensim/models/word2vec_inner.pyx":548 - * + * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< - * + * * cdef REAL_t *syn1 */ __pyx_v_result = 0; /* "gensim/models/word2vec_inner.pyx":554 * cdef np.uint8_t *codes[MAX_SENTENCE_LEN] - * + * * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< - * + * * # convert Python structures to primitive types, so we can release the GIL */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 554; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -6768,10 +6387,10 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gensim/models/word2vec_inner.pyx":557 - * + * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< - * + * * vlookup = model.vocab */ if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -6779,7 +6398,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY /* "gensim/models/word2vec_inner.pyx":559 * work = np.PyArray_DATA(_work) - * + * * vlookup = model.vocab # <<<<<<<<<<<<<< * i = 0 * for token in sentence: @@ -6790,7 +6409,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY __pyx_t_1 = 0; /* "gensim/models/word2vec_inner.pyx":560 - * + * * vlookup = model.vocab * i = 0 # <<<<<<<<<<<<<< * for token in sentence: @@ -6854,7 +6473,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY * if word is None: * continue # should drop the */ - __pyx_t_6 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 562; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = (__Pyx_PySequence_Contains(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 562; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if ((__pyx_t_6 != 0)) { __pyx_t_7 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 562; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_7); @@ -6886,14 +6505,6 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY * codelens[i] = len(word.code) */ goto __pyx_L3_continue; - - /* "gensim/models/word2vec_inner.pyx":563 - * for token in sentence: - * word = vlookup[token] if token in vlookup else None - * if word is None: # <<<<<<<<<<<<<< - * continue # should drop the - * indexes[i] = word.index - */ } /* "gensim/models/word2vec_inner.pyx":565 @@ -6973,7 +6584,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY * break # TODO: log warning, tally overflow? * sentence_len = i */ - __pyx_t_8 = ((__pyx_v_i == 0x2710) != 0); + __pyx_t_8 = ((__pyx_v_i == 100000) != 0); if (__pyx_t_8) { /* "gensim/models/word2vec_inner.pyx":572 @@ -6981,17 +6592,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY * if i == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< * sentence_len = i - * + * */ goto __pyx_L4_break; - - /* "gensim/models/word2vec_inner.pyx":571 - * result += 1 - * i += 1 - * if i == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< - * break # TODO: log warning, tally overflow? - * sentence_len = i - */ } /* "gensim/models/word2vec_inner.pyx":561 @@ -7010,23 +6613,23 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY * if i == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? * sentence_len = i # <<<<<<<<<<<<<< - * + * * # release GIL & train on the sentence */ __pyx_v_sentence_len = __pyx_v_i; /* "gensim/models/word2vec_inner.pyx":576 - * + * * # release GIL & train on the sentence * work[0] = 0.0 # <<<<<<<<<<<<<< - * + * * with nogil: */ (__pyx_v_work[0]) = 0.0; /* "gensim/models/word2vec_inner.pyx":578 * work[0] = 0.0 - * + * * with nogil: # <<<<<<<<<<<<<< * for i in range(sentence_len): * if codelens[i] == 0: @@ -7039,7 +6642,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY /*try:*/ { /* "gensim/models/word2vec_inner.pyx":579 - * + * * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< * if codelens[i] == 0: @@ -7067,14 +6670,6 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY * if j < 0: */ goto __pyx_L10_continue; - - /* "gensim/models/word2vec_inner.pyx":580 - * with nogil: - * for i in range(sentence_len): - * if codelens[i] == 0: # <<<<<<<<<<<<<< - * continue - * j = i - window - */ } /* "gensim/models/word2vec_inner.pyx":582 @@ -7104,15 +6699,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY * if k > sentence_len: */ __pyx_v_j = 0; - - /* "gensim/models/word2vec_inner.pyx":583 - * continue - * j = i - window - * if j < 0: # <<<<<<<<<<<<<< - * j = 0 - * k = i + window + 1 - */ + goto __pyx_L13; } + __pyx_L13:; /* "gensim/models/word2vec_inner.pyx":585 * if j < 0: @@ -7141,15 +6730,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY * if j == i or codelens[j] == 0: */ __pyx_v_k = __pyx_v_sentence_len; - - /* "gensim/models/word2vec_inner.pyx":586 - * j = 0 - * k = i + window + 1 - * if k > sentence_len: # <<<<<<<<<<<<<< - * k = sentence_len - * for j in range(j, k): - */ + goto __pyx_L14; } + __pyx_L14:; /* "gensim/models/word2vec_inner.pyx":588 * if k > sentence_len: @@ -7185,24 +6768,16 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY * if j == i or codelens[j] == 0: * continue # <<<<<<<<<<<<<< * score_pair_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], work) - * + * */ goto __pyx_L15_continue; - - /* "gensim/models/word2vec_inner.pyx":589 - * k = sentence_len - * for j in range(j, k): - * if j == i or codelens[j] == 0: # <<<<<<<<<<<<<< - * continue - * score_pair_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], work) - */ } /* "gensim/models/word2vec_inner.pyx":591 * if j == i or codelens[j] == 0: * continue * score_pair_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], work) # <<<<<<<<<<<<<< - * + * * return work[0] */ __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v_work); @@ -7214,7 +6789,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY /* "gensim/models/word2vec_inner.pyx":578 * work[0] = 0.0 - * + * * with nogil: # <<<<<<<<<<<<<< * for i in range(sentence_len): * if codelens[i] == 0: @@ -7232,9 +6807,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY /* "gensim/models/word2vec_inner.pyx":593 * score_pair_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], work) - * + * * return work[0] # <<<<<<<<<<<<<< - * + * * cdef void score_pair_sg_hs( */ __Pyx_XDECREF(__pyx_r); @@ -7245,10 +6820,10 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY goto __pyx_L0; /* "gensim/models/word2vec_inner.pyx":536 - * + * * # Score is only implemented for hierarchical softmax * def score_sentence_sg(model, sentence, _work): # <<<<<<<<<<<<<< - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ @@ -7270,7 +6845,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_sg(CY /* "gensim/models/word2vec_inner.pyx":595 * return work[0] - * + * * cdef void score_pair_sg_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, * REAL_t *syn0, REAL_t *syn1, const int size, @@ -7289,17 +6864,17 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n long __pyx_t_5; /* "gensim/models/word2vec_inner.pyx":601 - * + * * cdef long long b * cdef long long row1 = word2_index * size, row2, sgn # <<<<<<<<<<<<<< * cdef REAL_t f - * + * */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); /* "gensim/models/word2vec_inner.pyx":604 * cdef REAL_t f - * + * * for b in range(codelen): # <<<<<<<<<<<<<< * row2 = word_point[b] * size * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) @@ -7309,7 +6884,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n __pyx_v_b = __pyx_t_2; /* "gensim/models/word2vec_inner.pyx":605 - * + * * for b in range(codelen): * row2 = word_point[b] * size # <<<<<<<<<<<<<< * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) @@ -7370,14 +6945,6 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n * work[0] += f */ goto __pyx_L3_continue; - - /* "gensim/models/word2vec_inner.pyx":609 - * sgn = (-1)**word_code[b] # ch function: 0-> 1, 1 -> -1 - * f = sgn*f - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = LOG_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ } /* "gensim/models/word2vec_inner.pyx":611 @@ -7385,7 +6952,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n * continue * f = LOG_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< * work[0] += f - * + * */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_LOG_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); @@ -7393,7 +6960,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n * continue * f = LOG_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * work[0] += f # <<<<<<<<<<<<<< - * + * * def score_sentence_cbow(model, sentence, _work, _neu1): */ __pyx_t_5 = 0; @@ -7403,7 +6970,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n /* "gensim/models/word2vec_inner.pyx":595 * return work[0] - * + * * cdef void score_pair_sg_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, * REAL_t *syn0, REAL_t *syn1, const int size, @@ -7414,9 +6981,9 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n /* "gensim/models/word2vec_inner.pyx":614 * work[0] += f - * + * * def score_sentence_cbow(model, sentence, _work, _neu1): # <<<<<<<<<<<<<< - * + * * cdef int cbow_mean = model.cbow_mean */ @@ -7506,8 +7073,8 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1; int __pyx_v_size; - int __pyx_v_codelens[0x2710]; - __pyx_t_5numpy_uint32_t __pyx_v_indexes[0x2710]; + int __pyx_v_codelens[100000]; + __pyx_t_5numpy_uint32_t __pyx_v_indexes[100000]; int __pyx_v_sentence_len; int __pyx_v_window; int __pyx_v_i; @@ -7515,8 +7082,8 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( int __pyx_v_k; long __pyx_v_result; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1; - __pyx_t_5numpy_uint32_t *__pyx_v_points[0x2710]; - __pyx_t_5numpy_uint8_t *__pyx_v_codes[0x2710]; + __pyx_t_5numpy_uint32_t *__pyx_v_points[100000]; + __pyx_t_5numpy_uint8_t *__pyx_v_codes[100000]; PyObject *__pyx_v_vlookup = NULL; PyObject *__pyx_v_token = NULL; PyObject *__pyx_v_word = NULL; @@ -7540,9 +7107,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( /* "gensim/models/word2vec_inner.pyx":616 * def score_sentence_cbow(model, sentence, _work, _neu1): - * + * * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -7553,7 +7120,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( /* "gensim/models/word2vec_inner.pyx":618 * cdef int cbow_mean = model.cbow_mean - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *work * cdef REAL_t *neu1 @@ -7568,7 +7135,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( * cdef REAL_t *work * cdef REAL_t *neu1 * cdef int size = model.layer1_size # <<<<<<<<<<<<<< - * + * * cdef int codelens[MAX_SENTENCE_LEN] */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -7581,7 +7148,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( * cdef np.uint32_t indexes[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< - * + * * cdef int i, j, k */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 626; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -7591,19 +7158,19 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( __pyx_v_window = __pyx_t_2; /* "gensim/models/word2vec_inner.pyx":629 - * + * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< - * + * * # For hierarchical softmax */ __pyx_v_result = 0; /* "gensim/models/word2vec_inner.pyx":636 * cdef np.uint8_t *codes[MAX_SENTENCE_LEN] - * + * * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< - * + * * # convert Python structures to primitive types, so we can release the GIL */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -7613,11 +7180,11 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gensim/models/word2vec_inner.pyx":639 - * + * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) - * + * */ if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 639; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); @@ -7626,7 +7193,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< - * + * * vlookup = model.vocab */ if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 640; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -7634,7 +7201,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( /* "gensim/models/word2vec_inner.pyx":642 * neu1 = np.PyArray_DATA(_neu1) - * + * * vlookup = model.vocab # <<<<<<<<<<<<<< * i = 0 * for token in sentence: @@ -7645,7 +7212,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( __pyx_t_1 = 0; /* "gensim/models/word2vec_inner.pyx":643 - * + * * vlookup = model.vocab * i = 0 # <<<<<<<<<<<<<< * for token in sentence: @@ -7709,7 +7276,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( * if word is None: * continue # for score, should this be a default negative value? */ - __pyx_t_6 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 645; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = (__Pyx_PySequence_Contains(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 645; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if ((__pyx_t_6 != 0)) { __pyx_t_7 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 645; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_7); @@ -7741,15 +7308,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( * codelens[i] = len(word.code) */ goto __pyx_L3_continue; - - /* "gensim/models/word2vec_inner.pyx":646 - * for token in sentence: - * word = vlookup[token] if token in vlookup else None - * if word is None: # <<<<<<<<<<<<<< - * continue # for score, should this be a default negative value? - * indexes[i] = word.index - */ - } + } /* "gensim/models/word2vec_inner.pyx":648 * if word is None: @@ -7828,7 +7387,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( * break # TODO: log warning, tally overflow? * sentence_len = i */ - __pyx_t_8 = ((__pyx_v_i == 0x2710) != 0); + __pyx_t_8 = ((__pyx_v_i == 100000) != 0); if (__pyx_t_8) { /* "gensim/models/word2vec_inner.pyx":655 @@ -7836,17 +7395,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( * if i == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< * sentence_len = i - * + * */ goto __pyx_L4_break; - - /* "gensim/models/word2vec_inner.pyx":654 - * result += 1 - * i += 1 - * if i == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< - * break # TODO: log warning, tally overflow? - * sentence_len = i - */ } /* "gensim/models/word2vec_inner.pyx":644 @@ -7865,13 +7416,13 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( * if i == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? * sentence_len = i # <<<<<<<<<<<<<< - * + * * # release GIL & train on the sentence */ __pyx_v_sentence_len = __pyx_v_i; /* "gensim/models/word2vec_inner.pyx":659 - * + * * # release GIL & train on the sentence * work[0] = 0.0 # <<<<<<<<<<<<<< * with nogil: @@ -7922,14 +7473,6 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( * if j < 0: */ goto __pyx_L10_continue; - - /* "gensim/models/word2vec_inner.pyx":662 - * with nogil: - * for i in range(sentence_len): - * if codelens[i] == 0: # <<<<<<<<<<<<<< - * continue - * j = i - window - */ } /* "gensim/models/word2vec_inner.pyx":664 @@ -7959,15 +7502,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( * if k > sentence_len: */ __pyx_v_j = 0; - - /* "gensim/models/word2vec_inner.pyx":665 - * continue - * j = i - window - * if j < 0: # <<<<<<<<<<<<<< - * j = 0 - * k = i + window + 1 - */ + goto __pyx_L13; } + __pyx_L13:; /* "gensim/models/word2vec_inner.pyx":667 * if j < 0: @@ -7993,24 +7530,18 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< * score_pair_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, work, i, j, k, cbow_mean) - * + * */ __pyx_v_k = __pyx_v_sentence_len; - - /* "gensim/models/word2vec_inner.pyx":668 - * j = 0 - * k = i + window + 1 - * if k > sentence_len: # <<<<<<<<<<<<<< - * k = sentence_len - * score_pair_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, work, i, j, k, cbow_mean) - */ + goto __pyx_L14; } + __pyx_L14:; /* "gensim/models/word2vec_inner.pyx":670 * if k > sentence_len: * k = sentence_len * score_pair_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, work, i, j, k, cbow_mean) # <<<<<<<<<<<<<< - * + * * return work[0] */ __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean); @@ -8038,9 +7569,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( /* "gensim/models/word2vec_inner.pyx":672 * score_pair_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, work, i, j, k, cbow_mean) - * + * * return work[0] # <<<<<<<<<<<<<< - * + * * cdef void score_pair_cbow_hs( */ __Pyx_XDECREF(__pyx_r); @@ -8052,9 +7583,9 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( /* "gensim/models/word2vec_inner.pyx":614 * work[0] += f - * + * * def score_sentence_cbow(model, sentence, _work, _neu1): # <<<<<<<<<<<<<< - * + * * cdef int cbow_mean = model.cbow_mean */ @@ -8076,7 +7607,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8score_sentence_cbow( /* "gensim/models/word2vec_inner.pyx":674 * return work[0] - * + * * cdef void score_pair_cbow_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, @@ -8099,7 +7630,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ /* "gensim/models/word2vec_inner.pyx":685 * cdef int m - * + * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * count = 0.0 * for m in range(j, k): @@ -8107,7 +7638,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); /* "gensim/models/word2vec_inner.pyx":686 - * + * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< * for m in range(j, k): @@ -8152,24 +7683,16 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ * count += ONEF */ goto __pyx_L3_continue; - - /* "gensim/models/word2vec_inner.pyx":688 - * count = 0.0 - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ } + /*else*/ { - /* "gensim/models/word2vec_inner.pyx":691 + /* "gensim/models/word2vec_inner.pyx":691 * continue * else: * count += ONEF # <<<<<<<<<<<<<< * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if count > (0.5): */ - /*else*/ { __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14word2vec_inner_ONEF); /* "gensim/models/word2vec_inner.pyx":692 @@ -8202,22 +7725,16 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ * sscal(&size, &inv_count, neu1, &ONE) */ __pyx_v_inv_count = (__pyx_v_6gensim_6models_14word2vec_inner_ONEF / __pyx_v_count); - - /* "gensim/models/word2vec_inner.pyx":693 - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count - * if cbow_mean: - */ + goto __pyx_L8; } + __pyx_L8:; /* "gensim/models/word2vec_inner.pyx":695 * if count > (0.5): * inv_count = ONEF/count * if cbow_mean: # <<<<<<<<<<<<<< * sscal(&size, &inv_count, neu1, &ONE) - * + * */ __pyx_t_3 = (__pyx_v_cbow_mean != 0); if (__pyx_t_3) { @@ -8226,23 +7743,17 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ * inv_count = ONEF/count * if cbow_mean: * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< - * + * * for b in range(codelens[i]): */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - - /* "gensim/models/word2vec_inner.pyx":695 - * if count > (0.5): - * inv_count = ONEF/count - * if cbow_mean: # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) - * - */ + goto __pyx_L9; } + __pyx_L9:; /* "gensim/models/word2vec_inner.pyx":698 * sscal(&size, &inv_count, neu1, &ONE) - * + * * for b in range(codelens[i]): # <<<<<<<<<<<<<< * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) @@ -8252,7 +7763,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ __pyx_v_b = __pyx_t_5; /* "gensim/models/word2vec_inner.pyx":699 - * + * * for b in range(codelens[i]): * row2 = word_point[b] * size # <<<<<<<<<<<<<< * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) @@ -8313,14 +7824,6 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ * work[0] += f */ goto __pyx_L10_continue; - - /* "gensim/models/word2vec_inner.pyx":703 - * sgn = (-1)**word_code[b] # ch function: 0-> 1, 1 -> -1 - * f = sgn*f - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = LOG_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ } /* "gensim/models/word2vec_inner.pyx":705 @@ -8328,7 +7831,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ * continue * f = LOG_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< * work[0] += f - * + * */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_LOG_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); @@ -8336,8 +7839,8 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ * continue * f = LOG_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * work[0] += f # <<<<<<<<<<<<<< - * - * + * + * */ __pyx_t_6 = 0; (__pyx_v_work[__pyx_t_6]) = ((__pyx_v_work[__pyx_t_6]) + __pyx_v_f); @@ -8346,7 +7849,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ /* "gensim/models/word2vec_inner.pyx":674 * return work[0] - * + * * cdef void score_pair_cbow_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, @@ -8356,8 +7859,8 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ } /* "gensim/models/word2vec_inner.pyx":709 - * - * + * + * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized @@ -8395,7 +7898,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_10init(CYTHON_UNUSED __Pyx_RefNannySetupContext("init", 0); /* "gensim/models/word2vec_inner.pyx":719 - * + * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< * cdef float *y = [0.01] @@ -8433,13 +7936,13 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_10init(CYTHON_UNUSED __pyx_v_size = 1; /* "gensim/models/word2vec_inner.pyx":727 - * + * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) */ - for (__pyx_t_3 = 0; __pyx_t_3 < 0x3E8; __pyx_t_3+=1) { + for (__pyx_t_3 = 0; __pyx_t_3 < 1000; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; /* "gensim/models/word2vec_inner.pyx":728 @@ -8449,14 +7952,14 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_10init(CYTHON_UNUSED * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) * LOG_TABLE[i] = log( EXP_TABLE[i] ) */ - (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0x3E8)) * 2.0) - 1.0) * 6.0))); + (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)1000)) * 2.0) - 1.0) * 6.0))); /* "gensim/models/word2vec_inner.pyx":729 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< * LOG_TABLE[i] = log( EXP_TABLE[i] ) - * + * */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); @@ -8464,14 +7967,14 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_10init(CYTHON_UNUSED * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) * LOG_TABLE[i] = log( EXP_TABLE[i] ) # <<<<<<<<<<<<<< - * + * * # check whether sdot returns double or float */ (__pyx_v_6gensim_6models_14word2vec_inner_LOG_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)log((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]))); } /* "gensim/models/word2vec_inner.pyx":733 - * + * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< * p_res = &d_res @@ -8527,14 +8030,6 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_10init(CYTHON_UNUSED __Pyx_INCREF(__pyx_int_0); __pyx_r = __pyx_int_0; goto __pyx_L0; - - /* "gensim/models/word2vec_inner.pyx":735 - * d_res = dsdot(&size, x, &ONE, y, &ONE) - * p_res = &d_res - * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< - * our_dot = our_dot_double - * our_saxpy = saxpy - */ } /* "gensim/models/word2vec_inner.pyx":739 @@ -8576,24 +8071,16 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_10init(CYTHON_UNUSED __Pyx_INCREF(__pyx_int_1); __pyx_r = __pyx_int_1; goto __pyx_L0; - - /* "gensim/models/word2vec_inner.pyx":739 - * our_saxpy = saxpy - * return 0 # double - * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< - * our_dot = our_dot_float - * our_saxpy = saxpy - */ } + /*else*/ { - /* "gensim/models/word2vec_inner.pyx":746 + /* "gensim/models/word2vec_inner.pyx":746 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< * our_saxpy = our_saxpy_noblas * return 2 */ - /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas; /* "gensim/models/word2vec_inner.pyx":747 @@ -8601,7 +8088,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_10init(CYTHON_UNUSED * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< * return 2 - * + * */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas; @@ -8609,7 +8096,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_10init(CYTHON_UNUSED * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< - * + * * FAST_VERSION = init() # initialize the module */ __Pyx_XDECREF(__pyx_r); @@ -8619,8 +8106,8 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_10init(CYTHON_UNUSED } /* "gensim/models/word2vec_inner.pyx":709 - * - * + * + * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized @@ -8633,7 +8120,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_10init(CYTHON_UNUSED return __pyx_r; } -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":197 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":197 * # experimental exception made for __getbuffer__ and __releasebuffer__ * # -- the details of this may change. * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< @@ -8683,11 +8170,11 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_GIVEREF(__pyx_v_info->obj); } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":203 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":203 * # of flags - * + * * if info == NULL: return # <<<<<<<<<<<<<< - * + * * cdef int copy_shape, i, ndim */ __pyx_t_1 = ((__pyx_v_info == NULL) != 0); @@ -8696,36 +8183,36 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L0; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":206 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":206 + * * cdef int copy_shape, i, ndim * cdef int endian_detector = 1 # <<<<<<<<<<<<<< * cdef bint little_endian = ((&endian_detector)[0] != 0) - * + * */ __pyx_v_endian_detector = 1; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":207 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":207 * cdef int copy_shape, i, ndim * cdef int endian_detector = 1 * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<< - * + * * ndim = PyArray_NDIM(self) */ __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":209 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":209 * cdef bint little_endian = ((&endian_detector)[0] != 0) - * + * * ndim = PyArray_NDIM(self) # <<<<<<<<<<<<<< - * + * * if sizeof(npy_intp) != sizeof(Py_ssize_t): */ __pyx_v_ndim = PyArray_NDIM(__pyx_v_self); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":211 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":211 * ndim = PyArray_NDIM(self) - * + * * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< * copy_shape = 1 * else: @@ -8733,40 +8220,32 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0); if (__pyx_t_1) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":212 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":212 + * * if sizeof(npy_intp) != sizeof(Py_ssize_t): * copy_shape = 1 # <<<<<<<<<<<<<< * else: * copy_shape = 0 */ __pyx_v_copy_shape = 1; - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":211 - * ndim = PyArray_NDIM(self) - * - * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< - * copy_shape = 1 - * else: - */ goto __pyx_L4; } + /*else*/ { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":214 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":214 * copy_shape = 1 * else: * copy_shape = 0 # <<<<<<<<<<<<<< - * + * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) */ - /*else*/ { __pyx_v_copy_shape = 0; } __pyx_L4:; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":216 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":216 * copy_shape = 0 - * + * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): * raise ValueError(u"ndarray is not C contiguous") @@ -8778,31 +8257,23 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L6_bool_binop_done; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":217 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":217 + * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): # <<<<<<<<<<<<<< * raise ValueError(u"ndarray is not C contiguous") - * + * */ __pyx_t_2 = ((!(PyArray_CHKFLAGS(__pyx_v_self, NPY_C_CONTIGUOUS) != 0)) != 0); __pyx_t_1 = __pyx_t_2; __pyx_L6_bool_binop_done:; - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":216 - * copy_shape = 0 - * - * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< - * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): - * raise ValueError(u"ndarray is not C contiguous") - */ if (__pyx_t_1) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":218 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":218 * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): * raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<< - * + * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) */ __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -8810,19 +8281,11 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":216 - * copy_shape = 0 - * - * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< - * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): - * raise ValueError(u"ndarray is not C contiguous") - */ } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":220 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":220 * raise ValueError(u"ndarray is not C contiguous") - * + * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): * raise ValueError(u"ndarray is not Fortran contiguous") @@ -8834,31 +8297,23 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L9_bool_binop_done; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":221 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":221 + * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): # <<<<<<<<<<<<<< * raise ValueError(u"ndarray is not Fortran contiguous") - * + * */ __pyx_t_2 = ((!(PyArray_CHKFLAGS(__pyx_v_self, NPY_F_CONTIGUOUS) != 0)) != 0); __pyx_t_1 = __pyx_t_2; __pyx_L9_bool_binop_done:; - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":220 - * raise ValueError(u"ndarray is not C contiguous") - * - * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< - * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): - * raise ValueError(u"ndarray is not Fortran contiguous") - */ if (__pyx_t_1) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":222 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":222 * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): * raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<< - * + * * info.buf = PyArray_DATA(self) */ __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__8, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -8866,27 +8321,19 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; {__pyx_filename = __pyx_f[1]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":220 - * raise ValueError(u"ndarray is not C contiguous") - * - * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< - * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): - * raise ValueError(u"ndarray is not Fortran contiguous") - */ } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":224 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":224 * raise ValueError(u"ndarray is not Fortran contiguous") - * + * * info.buf = PyArray_DATA(self) # <<<<<<<<<<<<<< * info.ndim = ndim * if copy_shape: */ __pyx_v_info->buf = PyArray_DATA(__pyx_v_self); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":225 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":225 + * * info.buf = PyArray_DATA(self) * info.ndim = ndim # <<<<<<<<<<<<<< * if copy_shape: @@ -8894,7 +8341,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->ndim = __pyx_v_ndim; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":226 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":226 * info.buf = PyArray_DATA(self) * info.ndim = ndim * if copy_shape: # <<<<<<<<<<<<<< @@ -8904,7 +8351,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = (__pyx_v_copy_shape != 0); if (__pyx_t_1) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":229 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":229 * # Allocate new buffer for strides and shape info. * # This is allocated as one block, strides first. * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) # <<<<<<<<<<<<<< @@ -8913,7 +8360,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->strides = ((Py_ssize_t *)malloc((((sizeof(Py_ssize_t)) * ((size_t)__pyx_v_ndim)) * 2))); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":230 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":230 * # This is allocated as one block, strides first. * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) * info.shape = info.strides + ndim # <<<<<<<<<<<<<< @@ -8922,7 +8369,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->shape = (__pyx_v_info->strides + __pyx_v_ndim); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":231 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":231 * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) * info.shape = info.strides + ndim * for i in range(ndim): # <<<<<<<<<<<<<< @@ -8933,7 +8380,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_i = __pyx_t_5; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":232 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":232 * info.shape = info.strides + ndim * for i in range(ndim): * info.strides[i] = PyArray_STRIDES(self)[i] # <<<<<<<<<<<<<< @@ -8942,7 +8389,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ (__pyx_v_info->strides[__pyx_v_i]) = (PyArray_STRIDES(__pyx_v_self)[__pyx_v_i]); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 * for i in range(ndim): * info.strides[i] = PyArray_STRIDES(self)[i] * info.shape[i] = PyArray_DIMS(self)[i] # <<<<<<<<<<<<<< @@ -8951,28 +8398,20 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ (__pyx_v_info->shape[__pyx_v_i]) = (PyArray_DIMS(__pyx_v_self)[__pyx_v_i]); } - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":226 - * info.buf = PyArray_DATA(self) - * info.ndim = ndim - * if copy_shape: # <<<<<<<<<<<<<< - * # Allocate new buffer for strides and shape info. - * # This is allocated as one block, strides first. - */ goto __pyx_L11; } + /*else*/ { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235 * info.shape[i] = PyArray_DIMS(self)[i] * else: * info.strides = PyArray_STRIDES(self) # <<<<<<<<<<<<<< * info.shape = PyArray_DIMS(self) * info.suboffsets = NULL */ - /*else*/ { __pyx_v_info->strides = ((Py_ssize_t *)PyArray_STRIDES(__pyx_v_self)); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":236 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":236 * else: * info.strides = PyArray_STRIDES(self) * info.shape = PyArray_DIMS(self) # <<<<<<<<<<<<<< @@ -8983,7 +8422,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L11:; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 * info.strides = PyArray_STRIDES(self) * info.shape = PyArray_DIMS(self) * info.suboffsets = NULL # <<<<<<<<<<<<<< @@ -8992,57 +8431,57 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->suboffsets = NULL; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":238 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":238 * info.shape = PyArray_DIMS(self) * info.suboffsets = NULL * info.itemsize = PyArray_ITEMSIZE(self) # <<<<<<<<<<<<<< * info.readonly = not PyArray_ISWRITEABLE(self) - * + * */ __pyx_v_info->itemsize = PyArray_ITEMSIZE(__pyx_v_self); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239 * info.suboffsets = NULL * info.itemsize = PyArray_ITEMSIZE(self) * info.readonly = not PyArray_ISWRITEABLE(self) # <<<<<<<<<<<<<< - * + * * cdef int t */ __pyx_v_info->readonly = (!(PyArray_ISWRITEABLE(__pyx_v_self) != 0)); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":242 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":242 + * * cdef int t * cdef char* f = NULL # <<<<<<<<<<<<<< * cdef dtype descr = self.descr - * cdef int offset + * cdef list stack */ __pyx_v_f = NULL; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":243 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":243 * cdef int t * cdef char* f = NULL * cdef dtype descr = self.descr # <<<<<<<<<<<<<< + * cdef list stack * cdef int offset - * */ __pyx_t_3 = ((PyObject *)__pyx_v_self->descr); __Pyx_INCREF(__pyx_t_3); __pyx_v_descr = ((PyArray_Descr *)__pyx_t_3); __pyx_t_3 = 0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":246 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":247 * cdef int offset - * + * * cdef bint hasfields = PyDataType_HASFIELDS(descr) # <<<<<<<<<<<<<< - * + * * if not hasfields and not copy_shape: */ __pyx_v_hasfields = PyDataType_HASFIELDS(__pyx_v_descr); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":248 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":249 * cdef bint hasfields = PyDataType_HASFIELDS(descr) - * + * * if not hasfields and not copy_shape: # <<<<<<<<<<<<<< * # do not call releasebuffer * info.obj = None @@ -9058,7 +8497,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_L15_bool_binop_done:; if (__pyx_t_1) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":250 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":251 * if not hasfields and not copy_shape: * # do not call releasebuffer * info.obj = None # <<<<<<<<<<<<<< @@ -9070,25 +8509,17 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_GOTREF(__pyx_v_info->obj); __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = Py_None; - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":248 - * cdef bint hasfields = PyDataType_HASFIELDS(descr) - * - * if not hasfields and not copy_shape: # <<<<<<<<<<<<<< - * # do not call releasebuffer - * info.obj = None - */ goto __pyx_L14; } + /*else*/ { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":253 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":254 * else: * # need to call releasebuffer * info.obj = self # <<<<<<<<<<<<<< - * + * * if not hasfields: */ - /*else*/ { __Pyx_INCREF(((PyObject *)__pyx_v_self)); __Pyx_GIVEREF(((PyObject *)__pyx_v_self)); __Pyx_GOTREF(__pyx_v_info->obj); @@ -9097,9 +8528,9 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L14:; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":255 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":256 * info.obj = self - * + * * if not hasfields: # <<<<<<<<<<<<<< * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or @@ -9107,8 +8538,8 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = ((!(__pyx_v_hasfields != 0)) != 0); if (__pyx_t_1) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":256 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":257 + * * if not hasfields: * t = descr.type_num # <<<<<<<<<<<<<< * if ((descr.byteorder == c'>' and little_endian) or @@ -9117,7 +8548,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_4 = __pyx_v_descr->type_num; __pyx_v_t = __pyx_t_4; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":257 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":258 * if not hasfields: * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -9137,7 +8568,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L20_next_or:; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":258 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":259 * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<< @@ -9153,51 +8584,43 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_2 = ((!(__pyx_v_little_endian != 0)) != 0); __pyx_t_1 = __pyx_t_2; __pyx_L19_bool_binop_done:; - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":257 - * if not hasfields: - * t = descr.type_num - * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< - * (descr.byteorder == c'<' and not little_endian)): - * raise ValueError(u"Non-native byte order not supported") - */ if (__pyx_t_1) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":259 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 259; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - {__pyx_filename = __pyx_f[1]; __pyx_lineno = 259; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[1]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":257 - * if not hasfields: - * t = descr.type_num - * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< - * (descr.byteorder == c'<' and not little_endian)): - * raise ValueError(u"Non-native byte order not supported") + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277 + * elif t == NPY_CDOUBLE: f = "Zd" + * elif t == NPY_CLONGDOUBLE: f = "Zg" + * elif t == NPY_OBJECT: f = "O" # <<<<<<<<<<<<<< + * else: + * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) */ - } + switch (__pyx_v_t) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":261 * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") * if t == NPY_BYTE: f = "b" # <<<<<<<<<<<<<< * elif t == NPY_UBYTE: f = "B" * elif t == NPY_SHORT: f = "h" */ - switch (__pyx_v_t) { case NPY_BYTE: __pyx_v_f = __pyx_k_b; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":261 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":262 * raise ValueError(u"Non-native byte order not supported") * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" # <<<<<<<<<<<<<< @@ -9208,7 +8631,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_B; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":262 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":263 * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" * elif t == NPY_SHORT: f = "h" # <<<<<<<<<<<<<< @@ -9219,7 +8642,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_h; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":263 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":264 * elif t == NPY_UBYTE: f = "B" * elif t == NPY_SHORT: f = "h" * elif t == NPY_USHORT: f = "H" # <<<<<<<<<<<<<< @@ -9230,7 +8653,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_H; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":264 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":265 * elif t == NPY_SHORT: f = "h" * elif t == NPY_USHORT: f = "H" * elif t == NPY_INT: f = "i" # <<<<<<<<<<<<<< @@ -9241,7 +8664,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_i; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":265 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":266 * elif t == NPY_USHORT: f = "H" * elif t == NPY_INT: f = "i" * elif t == NPY_UINT: f = "I" # <<<<<<<<<<<<<< @@ -9252,7 +8675,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_I; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":266 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":267 * elif t == NPY_INT: f = "i" * elif t == NPY_UINT: f = "I" * elif t == NPY_LONG: f = "l" # <<<<<<<<<<<<<< @@ -9263,7 +8686,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_l; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":267 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":268 * elif t == NPY_UINT: f = "I" * elif t == NPY_LONG: f = "l" * elif t == NPY_ULONG: f = "L" # <<<<<<<<<<<<<< @@ -9274,7 +8697,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_L; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":268 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":269 * elif t == NPY_LONG: f = "l" * elif t == NPY_ULONG: f = "L" * elif t == NPY_LONGLONG: f = "q" # <<<<<<<<<<<<<< @@ -9285,7 +8708,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_q; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":269 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":270 * elif t == NPY_ULONG: f = "L" * elif t == NPY_LONGLONG: f = "q" * elif t == NPY_ULONGLONG: f = "Q" # <<<<<<<<<<<<<< @@ -9296,7 +8719,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_Q; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":270 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":271 * elif t == NPY_LONGLONG: f = "q" * elif t == NPY_ULONGLONG: f = "Q" * elif t == NPY_FLOAT: f = "f" # <<<<<<<<<<<<<< @@ -9307,7 +8730,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_f; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":271 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":272 * elif t == NPY_ULONGLONG: f = "Q" * elif t == NPY_FLOAT: f = "f" * elif t == NPY_DOUBLE: f = "d" # <<<<<<<<<<<<<< @@ -9318,7 +8741,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_d; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":272 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":273 * elif t == NPY_FLOAT: f = "f" * elif t == NPY_DOUBLE: f = "d" * elif t == NPY_LONGDOUBLE: f = "g" # <<<<<<<<<<<<<< @@ -9329,7 +8752,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_g; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":273 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 * elif t == NPY_DOUBLE: f = "d" * elif t == NPY_LONGDOUBLE: f = "g" * elif t == NPY_CFLOAT: f = "Zf" # <<<<<<<<<<<<<< @@ -9340,7 +8763,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_Zf; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":275 * elif t == NPY_LONGDOUBLE: f = "g" * elif t == NPY_CFLOAT: f = "Zf" * elif t == NPY_CDOUBLE: f = "Zd" # <<<<<<<<<<<<<< @@ -9351,7 +8774,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_Zd; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":275 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 * elif t == NPY_CFLOAT: f = "Zf" * elif t == NPY_CDOUBLE: f = "Zd" * elif t == NPY_CLONGDOUBLE: f = "Zg" # <<<<<<<<<<<<<< @@ -9362,7 +8785,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_Zg; break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277 * elif t == NPY_CDOUBLE: f = "Zd" * elif t == NPY_CLONGDOUBLE: f = "Zg" * elif t == NPY_OBJECT: f = "O" # <<<<<<<<<<<<<< @@ -9374,33 +8797,33 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P break; default: - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":278 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":279 * elif t == NPY_OBJECT: f = "O" * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<< * info.format = f * return */ - __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_t); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_t); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = PyUnicode_Format(__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_t_3); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyUnicode_Format(__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_t_3); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_GIVEREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_Raise(__pyx_t_6, 0, 0, 0); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - {__pyx_filename = __pyx_f[1]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[1]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} break; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":279 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":280 * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * info.format = f # <<<<<<<<<<<<<< @@ -9409,7 +8832,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->format = __pyx_v_f; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":280 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":281 * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * info.format = f * return # <<<<<<<<<<<<<< @@ -9418,27 +8841,19 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_r = 0; goto __pyx_L0; - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":255 - * info.obj = self - * - * if not hasfields: # <<<<<<<<<<<<<< - * t = descr.type_num - * if ((descr.byteorder == c'>' and little_endian) or - */ } + /*else*/ { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":282 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":283 * return * else: * info.format = stdlib.malloc(_buffer_format_string_len) # <<<<<<<<<<<<<< * info.format[0] = c'^' # Native data types, manual alignment * offset = 0 */ - /*else*/ { - __pyx_v_info->format = ((char *)malloc(0xFF)); + __pyx_v_info->format = ((char *)malloc(255)); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":283 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":284 * else: * info.format = stdlib.malloc(_buffer_format_string_len) * info.format[0] = c'^' # Native data types, manual alignment # <<<<<<<<<<<<<< @@ -9447,7 +8862,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ (__pyx_v_info->format[0]) = '^'; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":284 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":285 * info.format = stdlib.malloc(_buffer_format_string_len) * info.format[0] = c'^' # Native data types, manual alignment * offset = 0 # <<<<<<<<<<<<<< @@ -9456,27 +8871,27 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_offset = 0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":285 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":286 * info.format[0] = c'^' # Native data types, manual alignment * offset = 0 * f = _util_dtypestring(descr, info.format + 1, # <<<<<<<<<<<<<< * info.format + _buffer_format_string_len, * &offset) */ - __pyx_t_7 = __pyx_f_5numpy__util_dtypestring(__pyx_v_descr, (__pyx_v_info->format + 1), (__pyx_v_info->format + 0xFF), (&__pyx_v_offset)); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __pyx_f_5numpy__util_dtypestring(__pyx_v_descr, (__pyx_v_info->format + 1), (__pyx_v_info->format + 255), (&__pyx_v_offset)); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_f = __pyx_t_7; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":288 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":289 * info.format + _buffer_format_string_len, * &offset) * f[0] = c'\0' # Terminate format string # <<<<<<<<<<<<<< - * + * * def __releasebuffer__(ndarray self, Py_buffer* info): */ (__pyx_v_f[0]) = '\x00'; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":197 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":197 * # experimental exception made for __getbuffer__ and __releasebuffer__ * # -- the details of this may change. * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< @@ -9508,9 +8923,9 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P return __pyx_r; } -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":290 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 * f[0] = c'\0' # Terminate format string - * + * * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<< * if PyArray_HASFIELDS(self): * stdlib.free(info.format) @@ -9532,8 +8947,8 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s int __pyx_t_1; __Pyx_RefNannySetupContext("__releasebuffer__", 0); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":292 + * * def __releasebuffer__(ndarray self, Py_buffer* info): * if PyArray_HASFIELDS(self): # <<<<<<<<<<<<<< * stdlib.free(info.format) @@ -9542,7 +8957,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __pyx_t_1 = (PyArray_HASFIELDS(__pyx_v_self) != 0); if (__pyx_t_1) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":292 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":293 * def __releasebuffer__(ndarray self, Py_buffer* info): * if PyArray_HASFIELDS(self): * stdlib.free(info.format) # <<<<<<<<<<<<<< @@ -9550,17 +8965,11 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s * stdlib.free(info.strides) */ free(__pyx_v_info->format); - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 - * - * def __releasebuffer__(ndarray self, Py_buffer* info): - * if PyArray_HASFIELDS(self): # <<<<<<<<<<<<<< - * stdlib.free(info.format) - * if sizeof(npy_intp) != sizeof(Py_ssize_t): - */ + goto __pyx_L3; } + __pyx_L3:; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":293 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":294 * if PyArray_HASFIELDS(self): * stdlib.free(info.format) * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< @@ -9570,27 +8979,21 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0); if (__pyx_t_1) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":294 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":295 * stdlib.free(info.format) * if sizeof(npy_intp) != sizeof(Py_ssize_t): * stdlib.free(info.strides) # <<<<<<<<<<<<<< * # info.shape was stored after info.strides in the same block - * + * */ free(__pyx_v_info->strides); - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":293 - * if PyArray_HASFIELDS(self): - * stdlib.free(info.format) - * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< - * stdlib.free(info.strides) - * # info.shape was stored after info.strides in the same block - */ + goto __pyx_L4; } + __pyx_L4:; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":290 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 * f[0] = c'\0' # Terminate format string - * + * * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<< * if PyArray_HASFIELDS(self): * stdlib.free(info.format) @@ -9600,12 +9003,12 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __Pyx_RefNannyFinishContext(); } -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":770 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 * ctypedef npy_cdouble complex_t - * + * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< * return PyArray_MultiIterNew(1, a) - * + * */ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) { @@ -9617,26 +9020,26 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":772 + * * cdef inline object PyArray_MultiIterNew1(a): * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<< - * + * * cdef inline object PyArray_MultiIterNew2(a, b): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 771; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 772; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":770 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 * ctypedef npy_cdouble complex_t - * + * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< * return PyArray_MultiIterNew(1, a) - * + * */ /* function exit code */ @@ -9650,12 +9053,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ return __pyx_r; } -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":773 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":774 * return PyArray_MultiIterNew(1, a) - * + * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< * return PyArray_MultiIterNew(2, a, b) - * + * */ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) { @@ -9667,26 +9070,26 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":774 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":775 + * * cdef inline object PyArray_MultiIterNew2(a, b): * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<< - * + * * cdef inline object PyArray_MultiIterNew3(a, b, c): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 774; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 775; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":773 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":774 * return PyArray_MultiIterNew(1, a) - * + * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< * return PyArray_MultiIterNew(2, a, b) - * + * */ /* function exit code */ @@ -9700,12 +9103,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ return __pyx_r; } -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":776 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":777 * return PyArray_MultiIterNew(2, a, b) - * + * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< * return PyArray_MultiIterNew(3, a, b, c) - * + * */ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) { @@ -9717,26 +9120,26 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":777 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":778 + * * cdef inline object PyArray_MultiIterNew3(a, b, c): * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<< - * + * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 777; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 778; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":776 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":777 * return PyArray_MultiIterNew(2, a, b) - * + * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< * return PyArray_MultiIterNew(3, a, b, c) - * + * */ /* function exit code */ @@ -9750,12 +9153,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ return __pyx_r; } -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":779 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 * return PyArray_MultiIterNew(3, a, b, c) - * + * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< * return PyArray_MultiIterNew(4, a, b, c, d) - * + * */ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) { @@ -9767,26 +9170,26 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":781 + * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<< - * + * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 780; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 781; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":779 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 * return PyArray_MultiIterNew(3, a, b, c) - * + * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< * return PyArray_MultiIterNew(4, a, b, c, d) - * + * */ /* function exit code */ @@ -9800,12 +9203,12 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ return __pyx_r; } -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":782 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 * return PyArray_MultiIterNew(4, a, b, c, d) - * + * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< * return PyArray_MultiIterNew(5, a, b, c, d, e) - * + * */ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) { @@ -9817,26 +9220,26 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":784 + * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<< - * + * * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 783; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":782 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 * return PyArray_MultiIterNew(4, a, b, c, d) - * + * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< * return PyArray_MultiIterNew(5, a, b, c, d, e) - * + * */ /* function exit code */ @@ -9850,9 +9253,9 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ return __pyx_r; } -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":785 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 * return PyArray_MultiIterNew(5, a, b, c, d, e) - * + * * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<< * # Recursive utility function used in __getbuffer__ to get format * # string. The new location in the format string is returned. @@ -9882,69 +9285,69 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_util_dtypestring", 0); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":790 - * - * cdef dtype child + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":793 + * cdef int delta_offset + * cdef tuple i * cdef int endian_detector = 1 # <<<<<<<<<<<<<< * cdef bint little_endian = ((&endian_detector)[0] != 0) * cdef tuple fields */ __pyx_v_endian_detector = 1; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":791 - * cdef dtype child + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 + * cdef tuple i * cdef int endian_detector = 1 * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<< * cdef tuple fields - * + * */ __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 * cdef tuple fields - * + * * for childname in descr.names: # <<<<<<<<<<<<<< * fields = descr.fields[childname] * child, new_offset = fields */ if (unlikely(__pyx_v_descr->names == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); - {__pyx_filename = __pyx_f[1]; __pyx_lineno = 794; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[1]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_1 = __pyx_v_descr->names; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0; for (;;) { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 794; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 794; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); #endif __Pyx_XDECREF_SET(__pyx_v_childname, __pyx_t_3); __pyx_t_3 = 0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":795 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":798 + * * for childname in descr.names: * fields = descr.fields[childname] # <<<<<<<<<<<<<< * child, new_offset = fields - * + * */ if (unlikely(__pyx_v_descr->fields == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); - {__pyx_filename = __pyx_f[1]; __pyx_lineno = 795; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[1]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_descr->fields, __pyx_v_childname); if (unlikely(__pyx_t_3 == NULL)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 795; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_descr->fields, __pyx_v_childname); if (unlikely(__pyx_t_3 == NULL)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_3); - if (!(likely(PyTuple_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_t_3)->tp_name), 0))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 795; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(PyTuple_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_t_3)->tp_name), 0))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_XDECREF_SET(__pyx_v_fields, ((PyObject*)__pyx_t_3)); __pyx_t_3 = 0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":796 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":799 * for childname in descr.names: * fields = descr.fields[childname] * child, new_offset = fields # <<<<<<<<<<<<<< - * + * * if (end - f) - (new_offset - offset[0]) < 15: */ if (likely(__pyx_v_fields != Py_None)) { @@ -9957,70 +9360,62 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[1]; __pyx_lineno = 796; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[1]; __pyx_lineno = 799; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0); - __pyx_t_4 = PyTuple_GET_ITEM(sequence, 1); + __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0); + __pyx_t_4 = PyTuple_GET_ITEM(sequence, 1); __Pyx_INCREF(__pyx_t_3); __Pyx_INCREF(__pyx_t_4); #else - __pyx_t_3 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 796; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 799; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 796; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 799; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); #endif } else { - __Pyx_RaiseNoneNotIterableError(); {__pyx_filename = __pyx_f[1]; __pyx_lineno = 796; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_RaiseNoneNotIterableError(); {__pyx_filename = __pyx_f[1]; __pyx_lineno = 799; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_dtype))))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 796; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_dtype))))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 799; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_XDECREF_SET(__pyx_v_child, ((PyArray_Descr *)__pyx_t_3)); __pyx_t_3 = 0; __Pyx_XDECREF_SET(__pyx_v_new_offset, __pyx_t_4); __pyx_t_4 = 0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":798 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":801 * child, new_offset = fields - * + * * if (end - f) - (new_offset - offset[0]) < 15: # <<<<<<<<<<<<<< * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") - * + * */ - __pyx_t_4 = __Pyx_PyInt_From_int((__pyx_v_offset[0])); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyInt_From_int((__pyx_v_offset[0])); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 801; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyNumber_Subtract(__pyx_v_new_offset, __pyx_t_4); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyNumber_Subtract(__pyx_v_new_offset, __pyx_t_4); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 801; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 798; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 801; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_6 = ((((__pyx_v_end - __pyx_v_f) - ((int)__pyx_t_5)) < 15) != 0); if (__pyx_t_6) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":799 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":802 + * * if (end - f) - (new_offset - offset[0]) < 15: * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") # <<<<<<<<<<<<<< - * + * * if ((child.byteorder == c'>' and little_endian) or */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__10, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 799; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__10, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - {__pyx_filename = __pyx_f[1]; __pyx_lineno = 799; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":798 - * child, new_offset = fields - * - * if (end - f) - (new_offset - offset[0]) < 15: # <<<<<<<<<<<<<< - * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") - * - */ + {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":801 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":804 * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") - * + * * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< * (child.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") @@ -10038,8 +9433,8 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __pyx_L8_next_or:; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":802 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":805 + * * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<< * raise ValueError(u"Non-native byte order not supported") @@ -10054,96 +9449,80 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_7 = ((!(__pyx_v_little_endian != 0)) != 0); __pyx_t_6 = __pyx_t_7; __pyx_L7_bool_binop_done:; - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":801 - * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") - * - * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< - * (child.byteorder == c'<' and not little_endian)): - * raise ValueError(u"Non-native byte order not supported") - */ if (__pyx_t_6) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":803 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":806 * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< * # One could encode it in the format string and have Cython * # complain instead, BUT: < and > in format strings also imply */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__11, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 803; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__11, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - {__pyx_filename = __pyx_f[1]; __pyx_lineno = 803; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":801 - * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") - * - * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< - * (child.byteorder == c'<' and not little_endian)): - * raise ValueError(u"Non-native byte order not supported") - */ + {__pyx_filename = __pyx_f[1]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":813 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":816 + * * # Output padding bytes * while offset[0] < new_offset: # <<<<<<<<<<<<<< * f[0] = 120 # "x"; pad byte * f += 1 */ while (1) { - __pyx_t_3 = __Pyx_PyInt_From_int((__pyx_v_offset[0])); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 813; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyInt_From_int((__pyx_v_offset[0])); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 816; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyObject_RichCompare(__pyx_t_3, __pyx_v_new_offset, Py_LT); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 813; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_t_3, __pyx_v_new_offset, Py_LT); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 816; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 813; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 816; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (!__pyx_t_6) break; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":814 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":817 * # Output padding bytes * while offset[0] < new_offset: * f[0] = 120 # "x"; pad byte # <<<<<<<<<<<<<< * f += 1 * offset[0] += 1 */ - (__pyx_v_f[0]) = 0x78; + (__pyx_v_f[0]) = 120; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":815 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":818 * while offset[0] < new_offset: * f[0] = 120 # "x"; pad byte * f += 1 # <<<<<<<<<<<<<< * offset[0] += 1 - * + * */ __pyx_v_f = (__pyx_v_f + 1); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":816 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":819 * f[0] = 120 # "x"; pad byte * f += 1 * offset[0] += 1 # <<<<<<<<<<<<<< - * + * * offset[0] += child.itemsize */ __pyx_t_8 = 0; (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + 1); } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":818 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":821 * offset[0] += 1 - * + * * offset[0] += child.itemsize # <<<<<<<<<<<<<< - * + * * if not PyDataType_HASFIELDS(child): */ __pyx_t_8 = 0; (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + __pyx_v_child->elsize); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":820 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 * offset[0] += child.itemsize - * + * * if not PyDataType_HASFIELDS(child): # <<<<<<<<<<<<<< * t = child.type_num * if end - f < 5: @@ -10151,387 +9530,379 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = ((!(PyDataType_HASFIELDS(__pyx_v_child) != 0)) != 0); if (__pyx_t_6) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":821 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":824 + * * if not PyDataType_HASFIELDS(child): * t = child.type_num # <<<<<<<<<<<<<< * if end - f < 5: * raise RuntimeError(u"Format string allocated too short.") */ - __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_child->type_num); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 821; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_child->type_num); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 824; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_XDECREF_SET(__pyx_v_t, __pyx_t_4); __pyx_t_4 = 0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":822 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 * if not PyDataType_HASFIELDS(child): * t = child.type_num * if end - f < 5: # <<<<<<<<<<<<<< * raise RuntimeError(u"Format string allocated too short.") - * + * */ __pyx_t_6 = (((__pyx_v_end - __pyx_v_f) < 5) != 0); if (__pyx_t_6) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 * t = child.type_num * if end - f < 5: * raise RuntimeError(u"Format string allocated too short.") # <<<<<<<<<<<<<< - * + * * # Until ticket #99 is fixed, use integers to avoid warnings */ - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__12, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 823; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__12, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_Raise(__pyx_t_4, 0, 0, 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - {__pyx_filename = __pyx_f[1]; __pyx_lineno = 823; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":822 - * if not PyDataType_HASFIELDS(child): - * t = child.type_num - * if end - f < 5: # <<<<<<<<<<<<<< - * raise RuntimeError(u"Format string allocated too short.") - * - */ + {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":829 + * * # Until ticket #99 is fixed, use integers to avoid warnings * if t == NPY_BYTE: f[0] = 98 #"b" # <<<<<<<<<<<<<< * elif t == NPY_UBYTE: f[0] = 66 #"B" * elif t == NPY_SHORT: f[0] = 104 #"h" */ - __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_BYTE); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(NPY_BYTE); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 829; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 829; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 829; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_6) { (__pyx_v_f[0]) = 98; goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":827 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":830 * # Until ticket #99 is fixed, use integers to avoid warnings * if t == NPY_BYTE: f[0] = 98 #"b" * elif t == NPY_UBYTE: f[0] = 66 #"B" # <<<<<<<<<<<<<< * elif t == NPY_SHORT: f[0] = 104 #"h" * elif t == NPY_USHORT: f[0] = 72 #"H" */ - __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_UBYTE); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 827; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(NPY_UBYTE); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 830; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 827; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 830; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 827; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 830; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_6) { (__pyx_v_f[0]) = 66; goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":828 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":831 * if t == NPY_BYTE: f[0] = 98 #"b" * elif t == NPY_UBYTE: f[0] = 66 #"B" * elif t == NPY_SHORT: f[0] = 104 #"h" # <<<<<<<<<<<<<< * elif t == NPY_USHORT: f[0] = 72 #"H" * elif t == NPY_INT: f[0] = 105 #"i" */ - __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_SHORT); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(NPY_SHORT); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 831; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 831; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 831; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_6) { - (__pyx_v_f[0]) = 0x68; + (__pyx_v_f[0]) = 104; goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":829 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":832 * elif t == NPY_UBYTE: f[0] = 66 #"B" * elif t == NPY_SHORT: f[0] = 104 #"h" * elif t == NPY_USHORT: f[0] = 72 #"H" # <<<<<<<<<<<<<< * elif t == NPY_INT: f[0] = 105 #"i" * elif t == NPY_UINT: f[0] = 73 #"I" */ - __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_USHORT); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 829; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(NPY_USHORT); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 829; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 829; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_6) { (__pyx_v_f[0]) = 72; goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":830 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":833 * elif t == NPY_SHORT: f[0] = 104 #"h" * elif t == NPY_USHORT: f[0] = 72 #"H" * elif t == NPY_INT: f[0] = 105 #"i" # <<<<<<<<<<<<<< * elif t == NPY_UINT: f[0] = 73 #"I" * elif t == NPY_LONG: f[0] = 108 #"l" */ - __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_INT); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 830; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(NPY_INT); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 830; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 830; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_6) { - (__pyx_v_f[0]) = 0x69; + (__pyx_v_f[0]) = 105; goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":831 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":834 * elif t == NPY_USHORT: f[0] = 72 #"H" * elif t == NPY_INT: f[0] = 105 #"i" * elif t == NPY_UINT: f[0] = 73 #"I" # <<<<<<<<<<<<<< * elif t == NPY_LONG: f[0] = 108 #"l" * elif t == NPY_ULONG: f[0] = 76 #"L" */ - __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_UINT); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 831; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(NPY_UINT); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 834; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 831; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 834; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 831; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 834; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_6) { (__pyx_v_f[0]) = 73; goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":832 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":835 * elif t == NPY_INT: f[0] = 105 #"i" * elif t == NPY_UINT: f[0] = 73 #"I" * elif t == NPY_LONG: f[0] = 108 #"l" # <<<<<<<<<<<<<< * elif t == NPY_ULONG: f[0] = 76 #"L" * elif t == NPY_LONGLONG: f[0] = 113 #"q" */ - __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_LONG); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(NPY_LONG); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 835; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 835; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 835; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_6) { - (__pyx_v_f[0]) = 0x6C; + (__pyx_v_f[0]) = 108; goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":833 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":836 * elif t == NPY_UINT: f[0] = 73 #"I" * elif t == NPY_LONG: f[0] = 108 #"l" * elif t == NPY_ULONG: f[0] = 76 #"L" # <<<<<<<<<<<<<< * elif t == NPY_LONGLONG: f[0] = 113 #"q" * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" */ - __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_ULONG); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(NPY_ULONG); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 836; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 836; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 836; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_6) { (__pyx_v_f[0]) = 76; goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":834 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":837 * elif t == NPY_LONG: f[0] = 108 #"l" * elif t == NPY_ULONG: f[0] = 76 #"L" * elif t == NPY_LONGLONG: f[0] = 113 #"q" # <<<<<<<<<<<<<< * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" * elif t == NPY_FLOAT: f[0] = 102 #"f" */ - __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_LONGLONG); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 834; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(NPY_LONGLONG); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 837; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 834; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 837; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 834; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 837; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_6) { - (__pyx_v_f[0]) = 0x71; + (__pyx_v_f[0]) = 113; goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":835 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":838 * elif t == NPY_ULONG: f[0] = 76 #"L" * elif t == NPY_LONGLONG: f[0] = 113 #"q" * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" # <<<<<<<<<<<<<< * elif t == NPY_FLOAT: f[0] = 102 #"f" * elif t == NPY_DOUBLE: f[0] = 100 #"d" */ - __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_ULONGLONG); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 835; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(NPY_ULONGLONG); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 838; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 835; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 838; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 835; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 838; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_6) { (__pyx_v_f[0]) = 81; goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":836 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":839 * elif t == NPY_LONGLONG: f[0] = 113 #"q" * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" * elif t == NPY_FLOAT: f[0] = 102 #"f" # <<<<<<<<<<<<<< * elif t == NPY_DOUBLE: f[0] = 100 #"d" * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" */ - __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_FLOAT); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 836; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(NPY_FLOAT); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 836; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 836; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_6) { - (__pyx_v_f[0]) = 0x66; + (__pyx_v_f[0]) = 102; goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":837 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":840 * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" * elif t == NPY_FLOAT: f[0] = 102 #"f" * elif t == NPY_DOUBLE: f[0] = 100 #"d" # <<<<<<<<<<<<<< * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf */ - __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_DOUBLE); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 837; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(NPY_DOUBLE); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 840; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 837; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 840; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 837; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 840; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_6) { - (__pyx_v_f[0]) = 0x64; + (__pyx_v_f[0]) = 100; goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":838 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":841 * elif t == NPY_FLOAT: f[0] = 102 #"f" * elif t == NPY_DOUBLE: f[0] = 100 #"d" * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" # <<<<<<<<<<<<<< * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd */ - __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_LONGDOUBLE); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 838; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(NPY_LONGDOUBLE); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 838; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 838; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_6) { - (__pyx_v_f[0]) = 0x67; + (__pyx_v_f[0]) = 103; goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":839 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":842 * elif t == NPY_DOUBLE: f[0] = 100 #"d" * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf # <<<<<<<<<<<<<< * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg */ - __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_CFLOAT); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(NPY_CFLOAT); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_6) { (__pyx_v_f[0]) = 90; - (__pyx_v_f[1]) = 0x66; + (__pyx_v_f[1]) = 102; __pyx_v_f = (__pyx_v_f + 1); goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":840 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":843 * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd # <<<<<<<<<<<<<< * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg * elif t == NPY_OBJECT: f[0] = 79 #"O" */ - __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_CDOUBLE); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 840; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(NPY_CDOUBLE); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 843; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 840; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 843; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 840; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 843; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_6) { (__pyx_v_f[0]) = 90; - (__pyx_v_f[1]) = 0x64; + (__pyx_v_f[1]) = 100; __pyx_v_f = (__pyx_v_f + 1); goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":841 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":844 * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg # <<<<<<<<<<<<<< * elif t == NPY_OBJECT: f[0] = 79 #"O" * else: */ - __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_CLONGDOUBLE); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(NPY_CLONGDOUBLE); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 844; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 844; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 844; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_6) { (__pyx_v_f[0]) = 90; - (__pyx_v_f[1]) = 0x67; + (__pyx_v_f[1]) = 103; __pyx_v_f = (__pyx_v_f + 1); goto __pyx_L15; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":842 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":845 * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg * elif t == NPY_OBJECT: f[0] = 79 #"O" # <<<<<<<<<<<<<< * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) */ - __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_OBJECT); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(NPY_OBJECT); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 845; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 845; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 845; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_6) { (__pyx_v_f[0]) = 79; goto __pyx_L15; } + /*else*/ { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":844 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":847 * elif t == NPY_OBJECT: f[0] = 79 #"O" * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<< * f += 1 * else: */ - /*else*/ { - __pyx_t_3 = PyUnicode_Format(__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_v_t); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 844; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyUnicode_Format(__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_v_t); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 844; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_GIVEREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 844; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - {__pyx_filename = __pyx_f[1]; __pyx_lineno = 844; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[1]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_L15:; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":845 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":848 * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * f += 1 # <<<<<<<<<<<<<< @@ -10539,33 +9910,25 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx * # Cython ignores struct boundary information ("T{...}"), */ __pyx_v_f = (__pyx_v_f + 1); - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":820 - * offset[0] += child.itemsize - * - * if not PyDataType_HASFIELDS(child): # <<<<<<<<<<<<<< - * t = child.type_num - * if end - f < 5: - */ goto __pyx_L13; } + /*else*/ { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":849 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":852 * # Cython ignores struct boundary information ("T{...}"), * # so don't output it * f = _util_dtypestring(child, f, end, offset) # <<<<<<<<<<<<<< * return f - * + * */ - /*else*/ { - __pyx_t_9 = __pyx_f_5numpy__util_dtypestring(__pyx_v_child, __pyx_v_f, __pyx_v_end, __pyx_v_offset); if (unlikely(__pyx_t_9 == NULL)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 849; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __pyx_f_5numpy__util_dtypestring(__pyx_v_child, __pyx_v_f, __pyx_v_end, __pyx_v_offset); if (unlikely(__pyx_t_9 == NULL)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 852; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_f = __pyx_t_9; } __pyx_L13:; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 * cdef tuple fields - * + * * for childname in descr.names: # <<<<<<<<<<<<<< * fields = descr.fields[childname] * child, new_offset = fields @@ -10573,19 +9936,19 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":850 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":853 * # so don't output it * f = _util_dtypestring(child, f, end, offset) * return f # <<<<<<<<<<<<<< - * - * + * + * */ __pyx_r = __pyx_v_f; goto __pyx_L0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":785 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 * return PyArray_MultiIterNew(5, a, b, c, d, e) - * + * * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<< * # Recursive utility function used in __getbuffer__ to get format * # string. The new location in the format string is returned. @@ -10608,9 +9971,9 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx return __pyx_r; } -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":966 - * - * +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":969 + * + * * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< * cdef PyObject* baseptr * if base is None: @@ -10623,7 +9986,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a int __pyx_t_2; __Pyx_RefNannySetupContext("set_array_base", 0); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":968 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":971 * cdef inline void set_array_base(ndarray arr, object base): * cdef PyObject* baseptr * if base is None: # <<<<<<<<<<<<<< @@ -10634,7 +9997,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":969 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":972 * cdef PyObject* baseptr * if base is None: * baseptr = NULL # <<<<<<<<<<<<<< @@ -10642,28 +10005,20 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a * Py_INCREF(base) # important to do this before decref below! */ __pyx_v_baseptr = NULL; - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":968 - * cdef inline void set_array_base(ndarray arr, object base): - * cdef PyObject* baseptr - * if base is None: # <<<<<<<<<<<<<< - * baseptr = NULL - * else: - */ goto __pyx_L3; } + /*else*/ { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":971 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":974 * baseptr = NULL * else: * Py_INCREF(base) # important to do this before decref below! # <<<<<<<<<<<<<< * baseptr = base * Py_XDECREF(arr.base) */ - /*else*/ { Py_INCREF(__pyx_v_base); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":972 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":975 * else: * Py_INCREF(base) # important to do this before decref below! * baseptr = base # <<<<<<<<<<<<<< @@ -10674,27 +10029,27 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a } __pyx_L3:; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":973 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":976 * Py_INCREF(base) # important to do this before decref below! * baseptr = base * Py_XDECREF(arr.base) # <<<<<<<<<<<<<< * arr.base = baseptr - * + * */ Py_XDECREF(__pyx_v_arr->base); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":974 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":977 * baseptr = base * Py_XDECREF(arr.base) * arr.base = baseptr # <<<<<<<<<<<<<< - * + * * cdef inline object get_array_base(ndarray arr): */ __pyx_v_arr->base = __pyx_v_baseptr; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":966 - * - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":969 + * + * * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< * cdef PyObject* baseptr * if base is None: @@ -10704,9 +10059,9 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a __Pyx_RefNannyFinishContext(); } -/* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":976 +/* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 * arr.base = baseptr - * + * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< * if arr.base is NULL: * return None @@ -10718,8 +10073,8 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py int __pyx_t_1; __Pyx_RefNannySetupContext("get_array_base", 0); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":977 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":980 + * * cdef inline object get_array_base(ndarray arr): * if arr.base is NULL: # <<<<<<<<<<<<<< * return None @@ -10728,7 +10083,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __pyx_t_1 = ((__pyx_v_arr->base == NULL) != 0); if (__pyx_t_1) { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":978 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":981 * cdef inline object get_array_base(ndarray arr): * if arr.base is NULL: * return None # <<<<<<<<<<<<<< @@ -10739,31 +10094,23 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __Pyx_INCREF(Py_None); __pyx_r = Py_None; goto __pyx_L0; - - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":977 - * - * cdef inline object get_array_base(ndarray arr): - * if arr.base is NULL: # <<<<<<<<<<<<<< - * return None - * else: - */ } + /*else*/ { - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":980 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":983 * return None * else: * return arr.base # <<<<<<<<<<<<<< */ - /*else*/ { __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(((PyObject *)__pyx_v_arr->base)); __pyx_r = ((PyObject *)__pyx_v_arr->base); goto __pyx_L0; } - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":976 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 * arr.base = baseptr - * + * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< * if arr.base is NULL: * return None @@ -10804,10 +10151,13 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_u_Format_string_allocated_too_shor, __pyx_k_Format_string_allocated_too_shor, sizeof(__pyx_k_Format_string_allocated_too_shor), 0, 1, 0, 0}, {&__pyx_kp_u_Format_string_allocated_too_shor_2, __pyx_k_Format_string_allocated_too_shor_2, sizeof(__pyx_k_Format_string_allocated_too_shor_2), 0, 1, 0, 0}, {&__pyx_n_s_ImportError, __pyx_k_ImportError, sizeof(__pyx_k_ImportError), 0, 0, 1, 1}, + {&__pyx_n_s_MAX_BATCH_SENTENCES, __pyx_k_MAX_BATCH_SENTENCES, sizeof(__pyx_k_MAX_BATCH_SENTENCES), 0, 0, 1, 1}, + {&__pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_k_MAX_WORDS_IN_BATCH, sizeof(__pyx_k_MAX_WORDS_IN_BATCH), 0, 0, 1, 1}, {&__pyx_kp_u_Non_native_byte_order_not_suppor, __pyx_k_Non_native_byte_order_not_suppor, sizeof(__pyx_k_Non_native_byte_order_not_suppor), 0, 1, 0, 0}, {&__pyx_n_s_REAL, __pyx_k_REAL, sizeof(__pyx_k_REAL), 0, 0, 1, 1}, {&__pyx_n_s_RuntimeError, __pyx_k_RuntimeError, sizeof(__pyx_k_RuntimeError), 0, 0, 1, 1}, {&__pyx_n_s_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 0, 0, 1, 1}, + {&__pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_k_Volumes_work_workspace_gensim_t, sizeof(__pyx_k_Volumes_work_workspace_gensim_t), 0, 0, 1, 0}, {&__pyx_n_s__13, __pyx_k__13, sizeof(__pyx_k__13), 0, 0, 1, 1}, {&__pyx_n_s_alpha, __pyx_k_alpha, sizeof(__pyx_k_alpha), 0, 0, 1, 1}, {&__pyx_n_s_alpha_2, __pyx_k_alpha_2, sizeof(__pyx_k_alpha_2), 0, 0, 1, 1}, @@ -10825,7 +10175,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_fblas, __pyx_k_fblas, sizeof(__pyx_k_fblas), 0, 0, 1, 1}, {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1}, {&__pyx_n_s_gensim_models_word2vec_inner, __pyx_k_gensim_models_word2vec_inner, sizeof(__pyx_k_gensim_models_word2vec_inner), 0, 0, 1, 1}, - {&__pyx_kp_s_home_olavur_RaRe_w2v_batch_sent, __pyx_k_home_olavur_RaRe_w2v_batch_sent, sizeof(__pyx_k_home_olavur_RaRe_w2v_batch_sent), 0, 0, 1, 0}, {&__pyx_n_s_hs, __pyx_k_hs, sizeof(__pyx_k_hs), 0, 0, 1, 1}, {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1}, {&__pyx_n_s_idx, __pyx_k_idx, sizeof(__pyx_k_idx), 0, 0, 1, 1}, @@ -10905,7 +10254,7 @@ static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 799; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; __pyx_L1_error:; return -1; @@ -10919,7 +10268,7 @@ static int __Pyx_InitCachedConstants(void) { * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< - * + * * # convert Python structures to primitive types, so we can release the GIL */ __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -10933,7 +10282,7 @@ static int __Pyx_InitCachedConstants(void) { * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< - * + * * # convert Python structures to primitive types, so we can release the GIL */ __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -10947,7 +10296,7 @@ static int __Pyx_InitCachedConstants(void) { * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< - * + * * # convert Python structures to primitive types, so we can release the GIL */ __pyx_tuple__5 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -10957,75 +10306,75 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__6); __Pyx_GIVEREF(__pyx_tuple__6); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":218 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":218 * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): * raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<< - * + * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) */ __pyx_tuple__7 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_C_contiguous); if (unlikely(!__pyx_tuple__7)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__7); __Pyx_GIVEREF(__pyx_tuple__7); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":222 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":222 * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): * raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<< - * + * * info.buf = PyArray_DATA(self) */ __pyx_tuple__8 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_Fortran_contiguou); if (unlikely(!__pyx_tuple__8)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__8); __Pyx_GIVEREF(__pyx_tuple__8); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":259 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" */ - __pyx_tuple__9 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__9)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 259; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__9 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__9)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__9); __Pyx_GIVEREF(__pyx_tuple__9); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":799 - * + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":802 + * * if (end - f) - (new_offset - offset[0]) < 15: * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") # <<<<<<<<<<<<<< - * + * * if ((child.byteorder == c'>' and little_endian) or */ - __pyx_tuple__10 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor); if (unlikely(!__pyx_tuple__10)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 799; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__10 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor); if (unlikely(!__pyx_tuple__10)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__10); __Pyx_GIVEREF(__pyx_tuple__10); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":803 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":806 * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< * # One could encode it in the format string and have Cython * # complain instead, BUT: < and > in format strings also imply */ - __pyx_tuple__11 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 803; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__11 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__11); __Pyx_GIVEREF(__pyx_tuple__11); - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 * t = child.type_num * if end - f < 5: * raise RuntimeError(u"Format string allocated too short.") # <<<<<<<<<<<<<< - * + * * # Until ticket #99 is fixed, use integers to avoid warnings */ - __pyx_tuple__12 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor_2); if (unlikely(!__pyx_tuple__12)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 823; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__12 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor_2); if (unlikely(!__pyx_tuple__12)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__12); __Pyx_GIVEREF(__pyx_tuple__12); /* "gensim/models/word2vec_inner.pyx":258 - * - * + * + * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative @@ -11033,11 +10382,11 @@ static int __Pyx_InitCachedConstants(void) { __pyx_tuple__14 = PyTuple_Pack(32, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0, __pyx_n_s_word_locks, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 258; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__14); __Pyx_GIVEREF(__pyx_tuple__14); - __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(4, 0, 32, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_olavur_RaRe_w2v_batch_sent, __pyx_n_s_train_sentence_sg, 258, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 258; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(4, 0, 32, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_train_sentence_sg, 258, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 258; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "gensim/models/word2vec_inner.pyx":346 - * - * + * + * * def train_batch_sg(model, sentences, alpha, sentence_indeces, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative @@ -11045,11 +10394,11 @@ static int __Pyx_InitCachedConstants(void) { __pyx_tuple__16 = PyTuple_Pack(41, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_sentence_indeces, __pyx_n_s_work, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0, __pyx_n_s_word_locks, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_sentence_indeces_c, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_num_sentences, __pyx_n_s_sent_idx, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_idx, __pyx_n_s_idx1, __pyx_n_s_idx2, __pyx_n_s_sentence, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__16); __Pyx_GIVEREF(__pyx_tuple__16); - __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(5, 0, 41, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_olavur_RaRe_w2v_batch_sent, __pyx_n_s_train_batch_sg, 346, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(5, 0, 41, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_train_batch_sg, 346, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "gensim/models/word2vec_inner.pyx":447 - * - * + * + * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative @@ -11057,35 +10406,35 @@ static int __Pyx_InitCachedConstants(void) { __pyx_tuple__18 = PyTuple_Pack(35, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_word_locks, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 447; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__18); __Pyx_GIVEREF(__pyx_tuple__18); - __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(5, 0, 35, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_olavur_RaRe_w2v_batch_sent, __pyx_n_s_train_sentence_cbow, 447, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 447; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(5, 0, 35, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_train_sentence_cbow, 447, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 447; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "gensim/models/word2vec_inner.pyx":536 - * + * * # Score is only implemented for hierarchical softmax * def score_sentence_sg(model, sentence, _work): # <<<<<<<<<<<<<< - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ __pyx_tuple__20 = PyTuple_Pack(20, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_work, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_word); if (unlikely(!__pyx_tuple__20)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 536; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__20); __Pyx_GIVEREF(__pyx_tuple__20); - __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(3, 0, 20, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_olavur_RaRe_w2v_batch_sent, __pyx_n_s_score_sentence_sg, 536, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 536; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(3, 0, 20, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_score_sentence_sg, 536, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 536; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "gensim/models/word2vec_inner.pyx":614 * work[0] += f - * + * * def score_sentence_cbow(model, sentence, _work, _neu1): # <<<<<<<<<<<<<< - * + * * cdef int cbow_mean = model.cbow_mean */ __pyx_tuple__22 = PyTuple_Pack(23, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_word); if (unlikely(!__pyx_tuple__22)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 614; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__22); __Pyx_GIVEREF(__pyx_tuple__22); - __pyx_codeobj__23 = (PyObject*)__Pyx_PyCode_New(4, 0, 23, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__22, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_olavur_RaRe_w2v_batch_sent, __pyx_n_s_score_sentence_cbow, 614, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__23)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 614; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__23 = (PyObject*)__Pyx_PyCode_New(4, 0, 23, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__22, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_score_sentence_cbow, 614, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__23)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 614; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "gensim/models/word2vec_inner.pyx":709 - * - * + * + * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized @@ -11093,7 +10442,7 @@ static int __Pyx_InitCachedConstants(void) { __pyx_tuple__24 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__24)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 709; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__24); __Pyx_GIVEREF(__pyx_tuple__24); - __pyx_codeobj__25 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__24, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_olavur_RaRe_w2v_batch_sent, __pyx_n_s_init, 709, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 709; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__25 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__24, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_init, 709, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 709; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -11106,6 +10455,8 @@ static int __Pyx_InitGlobals(void) { __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_int_2 = PyInt_FromLong(2); if (unlikely(!__pyx_int_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_int_1000 = PyInt_FromLong(1000); if (unlikely(!__pyx_int_1000)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_int_100000 = PyInt_FromLong(100000L); if (unlikely(!__pyx_int_100000)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_int_16777216 = PyInt_FromLong(16777216L); if (unlikely(!__pyx_int_16777216)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; __pyx_L1_error:; @@ -11143,24 +10494,18 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) } #endif __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_word2vec_inner(void)", 0); - if (__Pyx_check_binary_version() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if ( __Pyx_check_binary_version() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #ifdef __Pyx_CyFunction_USED - if (__pyx_CyFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_CyFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif #ifdef __Pyx_FusedFunction_USED if (__pyx_FusedFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif - #ifdef __Pyx_Coroutine_USED - if (__pyx_Coroutine_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #endif #ifdef __Pyx_Generator_USED if (__pyx_Generator_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif - #ifdef __Pyx_StopAsyncIteration_USED - if (__pyx_StopAsyncIteration_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #endif /*--- Library function declarations ---*/ /*--- Threads initialization code ---*/ #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS @@ -11183,12 +10528,12 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) #endif if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; /*--- Initialize various global constants etc. ---*/ - if (__Pyx_InitGlobals() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_InitGlobals() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) if (__Pyx_init_sys_getdefaultencoding_params() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif if (__pyx_module_is_main_gensim__models__word2vec_inner) { - if (PyObject_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyObject_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; } #if PY_MAJOR_VERSION >= 3 { @@ -11199,9 +10544,9 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) } #endif /*--- Builtin init code ---*/ - if (__Pyx_InitCachedBuiltins() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_InitCachedBuiltins() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /*--- Constants init code ---*/ - if (__Pyx_InitCachedConstants() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_InitCachedConstants() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /*--- Global init code ---*/ /*--- Variable export code ---*/ if (__Pyx_ExportVoidPtr(__pyx_n_s_scopy, (void *)&__pyx_v_6gensim_6models_14word2vec_inner_scopy, "__pyx_t_6gensim_6models_14word2vec_inner_scopy_ptr") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -11210,7 +10555,7 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) if (__Pyx_ExportVoidPtr(__pyx_n_s_dsdot, (void *)&__pyx_v_6gensim_6models_14word2vec_inner_dsdot, "__pyx_t_6gensim_6models_14word2vec_inner_dsdot_ptr") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_ExportVoidPtr(__pyx_n_s_snrm2, (void *)&__pyx_v_6gensim_6models_14word2vec_inner_snrm2, "__pyx_t_6gensim_6models_14word2vec_inner_snrm2_ptr") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_ExportVoidPtr(__pyx_n_s_sscal, (void *)&__pyx_v_6gensim_6models_14word2vec_inner_sscal, "__pyx_t_6gensim_6models_14word2vec_inner_sscal_ptr") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_ExportVoidPtr(__pyx_n_s_EXP_TABLE, (void *)&__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE, "__pyx_t_6gensim_6models_14word2vec_inner_REAL_t [0x3E8]") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ExportVoidPtr(__pyx_n_s_EXP_TABLE, (void *)&__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE, "__pyx_t_6gensim_6models_14word2vec_inner_REAL_t [1000]") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_ExportVoidPtr(__pyx_n_s_our_dot, (void *)&__pyx_v_6gensim_6models_14word2vec_inner_our_dot, "__pyx_t_6gensim_6models_14word2vec_inner_our_dot_ptr") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_ExportVoidPtr(__pyx_n_s_our_saxpy, (void *)&__pyx_v_6gensim_6models_14word2vec_inner_our_saxpy, "__pyx_t_6gensim_6models_14word2vec_inner_our_saxpy_ptr") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /*--- Function export code ---*/ @@ -11222,7 +10567,7 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) if (__Pyx_ExportFunction("random_int32", (void (*)(void))__pyx_f_6gensim_6models_14word2vec_inner_random_int32, "unsigned PY_LONG_LONG (unsigned PY_LONG_LONG *)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /*--- Type init code ---*/ /*--- Type import code ---*/ - __pyx_ptype_7cpython_4type_type = __Pyx_ImportType(__Pyx_BUILTIN_MODULE_NAME, "type", + __pyx_ptype_7cpython_4type_type = __Pyx_ImportType(__Pyx_BUILTIN_MODULE_NAME, "type", #if CYTHON_COMPILING_IN_PYPY sizeof(PyTypeObject), #else @@ -11233,20 +10578,17 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) __pyx_ptype_5numpy_flatiter = __Pyx_ImportType("numpy", "flatiter", sizeof(PyArrayIterObject), 0); if (unlikely(!__pyx_ptype_5numpy_flatiter)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 168; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5numpy_broadcast = __Pyx_ImportType("numpy", "broadcast", sizeof(PyArrayMultiIterObject), 0); if (unlikely(!__pyx_ptype_5numpy_broadcast)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 172; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5numpy_ndarray = __Pyx_ImportType("numpy", "ndarray", sizeof(PyArrayObject), 0); if (unlikely(!__pyx_ptype_5numpy_ndarray)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_ptype_5numpy_ufunc = __Pyx_ImportType("numpy", "ufunc", sizeof(PyUFuncObject), 0); if (unlikely(!__pyx_ptype_5numpy_ufunc)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 861; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_5numpy_ufunc = __Pyx_ImportType("numpy", "ufunc", sizeof(PyUFuncObject), 0); if (unlikely(!__pyx_ptype_5numpy_ufunc)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 864; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /*--- Variable import code ---*/ /*--- Function import code ---*/ /*--- Execution code ---*/ - #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) - if (__Pyx_patch_abc() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #endif /* "gensim/models/word2vec_inner.pyx":11 - * + * * import cython * import numpy as np # <<<<<<<<<<<<<< * cimport numpy as np - * + * */ __pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -11254,7 +10596,7 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gensim/models/word2vec_inner.pyx":19 - * + * * # scipy <= 0.15 * try: # <<<<<<<<<<<<<< * from scipy.linalg.blas import fblas @@ -11287,14 +10629,6 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_fblas, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L2_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - - /* "gensim/models/word2vec_inner.pyx":19 - * - * # scipy <= 0.15 - * try: # <<<<<<<<<<<<<< - * from scipy.linalg.blas import fblas - * except ImportError: - */ } __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -11323,7 +10657,7 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) * except ImportError: * # in scipy > 0.15, fblas function has been removed * import scipy.linalg.blas as fblas # <<<<<<<<<<<<<< - * + * * REAL = np.float32 */ __pyx_t_8 = PyList_New(1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L4_except_error;} @@ -11343,14 +10677,6 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) } goto __pyx_L4_except_error; __pyx_L4_except_error:; - - /* "gensim/models/word2vec_inner.pyx":19 - * - * # scipy <= 0.15 - * try: # <<<<<<<<<<<<<< - * from scipy.linalg.blas import fblas - * except ImportError: - */ __Pyx_XGIVEREF(__pyx_t_2); __Pyx_XGIVEREF(__pyx_t_3); __Pyx_XGIVEREF(__pyx_t_4); @@ -11366,10 +10692,10 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) /* "gensim/models/word2vec_inner.pyx":25 * import scipy.linalg.blas as fblas - * + * * REAL = np.float32 # <<<<<<<<<<<<<< - * - * DEF MAX_SENTENCE_LEN = 10000 + * + * DEF MAX_SENTENCE_LEN = 100000 */ __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -11381,7 +10707,7 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) /* "gensim/models/word2vec_inner.pyx":30 * DEF MAX_NUM_SENTENCES = 1000 - * + * * cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x # <<<<<<<<<<<<<< * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) @@ -11398,7 +10724,7 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gensim/models/word2vec_inner.pyx":31 - * + * * cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x # <<<<<<<<<<<<<< * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) @@ -11456,7 +10782,7 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) # <<<<<<<<<<<<<< * cdef sscal_ptr sscal=PyCObject_AsVoidPtr(fblas.sscal._cpointer) # x = alpha * x - * + * */ __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -11473,7 +10799,7 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) * cdef sscal_ptr sscal=PyCObject_AsVoidPtr(fblas.sscal._cpointer) # x = alpha * x # <<<<<<<<<<<<<< - * + * * DEF EXP_TABLE_SIZE = 1000 */ __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -11489,25 +10815,25 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) /* "gensim/models/word2vec_inner.pyx":43 * cdef REAL_t[EXP_TABLE_SIZE] LOG_TABLE - * + * * cdef int ONE = 1 # <<<<<<<<<<<<<< * cdef REAL_t ONEF = 1.0 - * + * */ __pyx_v_6gensim_6models_14word2vec_inner_ONE = 1; /* "gensim/models/word2vec_inner.pyx":44 - * + * * cdef int ONE = 1 * cdef REAL_t ONEF = 1.0 # <<<<<<<<<<<<<< - * + * * # for when fblas.sdot returns a double */ __pyx_v_6gensim_6models_14word2vec_inner_ONEF = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)1.0); /* "gensim/models/word2vec_inner.pyx":258 - * - * + * + * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative @@ -11518,8 +10844,8 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gensim/models/word2vec_inner.pyx":346 - * - * + * + * * def train_batch_sg(model, sentences, alpha, sentence_indeces, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative @@ -11530,8 +10856,8 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gensim/models/word2vec_inner.pyx":447 - * - * + * + * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative @@ -11542,10 +10868,10 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gensim/models/word2vec_inner.pyx":536 - * + * * # Score is only implemented for hierarchical softmax * def score_sentence_sg(model, sentence, _work): # <<<<<<<<<<<<<< - * + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14word2vec_inner_7score_sentence_sg, NULL, __pyx_n_s_gensim_models_word2vec_inner); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 536; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -11555,9 +10881,9 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) /* "gensim/models/word2vec_inner.pyx":614 * work[0] += f - * + * * def score_sentence_cbow(model, sentence, _work, _neu1): # <<<<<<<<<<<<<< - * + * * cdef int cbow_mean = model.cbow_mean */ __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14word2vec_inner_9score_sentence_cbow, NULL, __pyx_n_s_gensim_models_word2vec_inner); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 614; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -11566,8 +10892,8 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gensim/models/word2vec_inner.pyx":709 - * - * + * + * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized @@ -11579,8 +10905,10 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) /* "gensim/models/word2vec_inner.pyx":750 * return 2 - * + * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< + * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN + * MAX_BATCH_SENTENCES = MAX_NUM_SENTENCES */ __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 750; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); @@ -11605,6 +10933,21 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 750; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + /* "gensim/models/word2vec_inner.pyx":751 + * + * FAST_VERSION = init() # initialize the module + * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN # <<<<<<<<<<<<<< + * MAX_BATCH_SENTENCES = MAX_NUM_SENTENCES + */ + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_100000) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 751; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + + /* "gensim/models/word2vec_inner.pyx":752 + * FAST_VERSION = init() # initialize the module + * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN + * MAX_BATCH_SENTENCES = MAX_NUM_SENTENCES # <<<<<<<<<<<<<< + */ + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_BATCH_SENTENCES, __pyx_int_1000) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 752; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + /* "gensim/models/word2vec_inner.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< * # cython: boundscheck=False @@ -11615,9 +10958,9 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "../../../../../.virtualenvs/RaRe_w2v_batch_sentences/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":976 + /* "../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 * arr.base = baseptr - * + * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< * if arr.base is NULL: * return None @@ -11849,111 +11192,10 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg } #endif -#if CYTHON_USE_PYLONG_INTERNALS - #include "longintrepr.h" -#endif - -#if CYTHON_COMPILING_IN_CPYTHON -static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, CYTHON_UNUSED long intval, CYTHON_UNUSED int inplace) { - #if PY_MAJOR_VERSION < 3 - if (likely(PyInt_CheckExact(op1))) { - const long b = intval; - long x; - long a = PyInt_AS_LONG(op1); - x = (long)((unsigned long)a + b); - if (likely((x^a) >= 0 || (x^b) >= 0)) - return PyInt_FromLong(x); - return PyLong_Type.tp_as_number->nb_add(op1, op2); - } - #endif - #if CYTHON_USE_PYLONG_INTERNALS && PY_MAJOR_VERSION >= 3 - if (likely(PyLong_CheckExact(op1))) { - const long b = intval; - long a, x; - const PY_LONG_LONG llb = intval; - PY_LONG_LONG lla, llx; - const digit* digits = ((PyLongObject*)op1)->ob_digit; - const Py_ssize_t size = Py_SIZE(op1); - if (likely(__Pyx_sst_abs(size) <= 1)) { - a = likely(size) ? digits[0] : 0; - if (size == -1) a = -a; - } else { - switch (size) { - case -2: - if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { - a = -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0])); - break; - } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { - lla = -(PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | digits[0])); - goto long_long; - } - case 2: - if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { - a = (long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0])); - break; - } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { - lla = (PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | digits[0])); - goto long_long; - } - case -3: - if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { - a = -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - break; - } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { - lla = -(PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - goto long_long; - } - case 3: - if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { - a = (long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - break; - } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { - lla = (PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - goto long_long; - } - case -4: - if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { - a = -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - break; - } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { - lla = -(PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - goto long_long; - } - case 4: - if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { - a = (long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - break; - } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { - lla = (PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - goto long_long; - } - default: return PyLong_Type.tp_as_number->nb_add(op1, op2); - } - } - x = a + b; - return PyLong_FromLong(x); - long_long: - llx = lla + llb; - return PyLong_FromLongLong(llx); - } - #endif - if (PyFloat_CheckExact(op1)) { - const long b = intval; - double a = PyFloat_AS_DOUBLE(op1); - double result; - PyFPE_START_PROTECT("add", return NULL) - result = ((double)a) + (double)b; - PyFPE_END_PROTECT(result) - return PyFloat_FromDouble(result); - } - return (inplace ? PyNumber_InPlaceAdd : PyNumber_Add)(op1, op2); -} -#endif - -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice(PyObject* obj, - Py_ssize_t cstart, Py_ssize_t cstop, - PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice, - int has_cstart, int has_cstop, CYTHON_UNUSED int wraparound) { +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice(PyObject* obj, + Py_ssize_t cstart, Py_ssize_t cstop, + PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice, + int has_cstart, int has_cstop, CYTHON_UNUSED int wraparound) { #if CYTHON_COMPILING_IN_CPYTHON PyMappingMethods* mp; #if PY_MAJOR_VERSION < 3 @@ -12253,79 +11495,6 @@ static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); } -static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { - PyObject *empty_list = 0; - PyObject *module = 0; - PyObject *global_dict = 0; - PyObject *empty_dict = 0; - PyObject *list; - #if PY_VERSION_HEX < 0x03030000 - PyObject *py_import; - py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); - if (!py_import) - goto bad; - #endif - if (from_list) - list = from_list; - else { - empty_list = PyList_New(0); - if (!empty_list) - goto bad; - list = empty_list; - } - global_dict = PyModule_GetDict(__pyx_m); - if (!global_dict) - goto bad; - empty_dict = PyDict_New(); - if (!empty_dict) - goto bad; - { - #if PY_MAJOR_VERSION >= 3 - if (level == -1) { - if (strchr(__Pyx_MODULE_NAME, '.')) { - #if PY_VERSION_HEX < 0x03030000 - PyObject *py_level = PyInt_FromLong(1); - if (!py_level) - goto bad; - module = PyObject_CallFunctionObjArgs(py_import, - name, global_dict, empty_dict, list, py_level, NULL); - Py_DECREF(py_level); - #else - module = PyImport_ImportModuleLevelObject( - name, global_dict, empty_dict, list, 1); - #endif - if (!module) { - if (!PyErr_ExceptionMatches(PyExc_ImportError)) - goto bad; - PyErr_Clear(); - } - } - level = 0; - } - #endif - if (!module) { - #if PY_VERSION_HEX < 0x03030000 - PyObject *py_level = PyInt_FromLong(level); - if (!py_level) - goto bad; - module = PyObject_CallFunctionObjArgs(py_import, - name, global_dict, empty_dict, list, py_level, NULL); - Py_DECREF(py_level); - #else - module = PyImport_ImportModuleLevelObject( - name, global_dict, empty_dict, list, level); - #endif - } - } -bad: - #if PY_VERSION_HEX < 0x03030000 - Py_XDECREF(py_import); - #endif - Py_XDECREF(empty_list); - Py_XDECREF(empty_dict); - return module; -} - static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { PyObject* value = __Pyx_PyObject_GetAttrStr(module, name); if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) { @@ -12514,7 +11683,7 @@ static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int co return count; } while (start < end) { - mid = start + (end - start) / 2; + mid = (start + end) / 2; if (code_line < entries[mid].code_line) { end = mid; } else if (code_line > entries[mid].code_line) { @@ -12667,8 +11836,81 @@ static void __Pyx_AddTraceback(const char *funcname, int c_line, Py_XDECREF(py_frame); } +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { + PyObject *empty_list = 0; + PyObject *module = 0; + PyObject *global_dict = 0; + PyObject *empty_dict = 0; + PyObject *list; + #if PY_VERSION_HEX < 0x03030000 + PyObject *py_import; + py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); + if (!py_import) + goto bad; + #endif + if (from_list) + list = from_list; + else { + empty_list = PyList_New(0); + if (!empty_list) + goto bad; + list = empty_list; + } + global_dict = PyModule_GetDict(__pyx_m); + if (!global_dict) + goto bad; + empty_dict = PyDict_New(); + if (!empty_dict) + goto bad; + { + #if PY_MAJOR_VERSION >= 3 + if (level == -1) { + if (strchr(__Pyx_MODULE_NAME, '.')) { + #if PY_VERSION_HEX < 0x03030000 + PyObject *py_level = PyInt_FromLong(1); + if (!py_level) + goto bad; + module = PyObject_CallFunctionObjArgs(py_import, + name, global_dict, empty_dict, list, py_level, NULL); + Py_DECREF(py_level); + #else + module = PyImport_ImportModuleLevelObject( + name, global_dict, empty_dict, list, 1); + #endif + if (!module) { + if (!PyErr_ExceptionMatches(PyExc_ImportError)) + goto bad; + PyErr_Clear(); + } + } + level = 0; + } + #endif + if (!module) { + #if PY_VERSION_HEX < 0x03030000 + PyObject *py_level = PyInt_FromLong(level); + if (!py_level) + goto bad; + module = PyObject_CallFunctionObjArgs(py_import, + name, global_dict, empty_dict, list, py_level, NULL); + Py_DECREF(py_level); + #else + module = PyImport_ImportModuleLevelObject( + name, global_dict, empty_dict, list, level); + #endif + } + } +bad: + #if PY_VERSION_HEX < 0x03030000 + Py_XDECREF(py_import); + #endif + Py_XDECREF(empty_list); + Py_XDECREF(empty_dict); + return module; +} + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { - const int neg_one = (int) -1, const_zero = (int) 0; + const int neg_one = (int) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; if (is_unsigned) { if (sizeof(int) < sizeof(long)) { @@ -12693,29 +11935,29 @@ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { } } -#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ - __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) -#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ - __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) -#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ - {\ - func_type value = func_value;\ - if (sizeof(target_type) < sizeof(func_type)) {\ - if (unlikely(value != (func_type) (target_type) value)) {\ - func_type zero = 0;\ - if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ - return (target_type) -1;\ - if (is_unsigned && unlikely(value < zero))\ - goto raise_neg_overflow;\ - else\ - goto raise_overflow;\ - }\ - }\ - return (target_type) value;\ - } +#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value) \ + { \ + func_type value = func_value; \ + if (sizeof(target_type) < sizeof(func_type)) { \ + if (unlikely(value != (func_type) (target_type) value)) { \ + func_type zero = 0; \ + if (is_unsigned && unlikely(value < zero)) \ + goto raise_neg_overflow; \ + else \ + goto raise_overflow; \ + } \ + } \ + return (target_type) value; \ + } + +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + #include "longintrepr.h" + #endif +#endif static CYTHON_INLINE PY_LONG_LONG __Pyx_PyInt_As_PY_LONG_LONG(PyObject *x) { - const PY_LONG_LONG neg_one = (PY_LONG_LONG) -1, const_zero = (PY_LONG_LONG) 0; + const PY_LONG_LONG neg_one = (PY_LONG_LONG) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_Check(x))) { @@ -12732,39 +11974,13 @@ static CYTHON_INLINE PY_LONG_LONG __Pyx_PyInt_As_PY_LONG_LONG(PyObject *x) { #endif if (likely(PyLong_Check(x))) { if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - const digit* digits = ((PyLongObject*)x)->ob_digit; +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS switch (Py_SIZE(x)) { - case 0: return (PY_LONG_LONG) 0; - case 1: __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, digit, digits[0]) - case 2: - if (8 * sizeof(PY_LONG_LONG) > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(PY_LONG_LONG) >= 2 * PyLong_SHIFT) { - return (PY_LONG_LONG) (((((PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 3: - if (8 * sizeof(PY_LONG_LONG) > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(PY_LONG_LONG) >= 3 * PyLong_SHIFT) { - return (PY_LONG_LONG) (((((((PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 4: - if (8 * sizeof(PY_LONG_LONG) > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(PY_LONG_LONG) >= 4 * PyLong_SHIFT) { - return (PY_LONG_LONG) (((((((((PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; + case 0: return 0; + case 1: __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, digit, ((PyLongObject*)x)->ob_digit[0]); } + #endif #endif #if CYTHON_COMPILING_IN_CPYTHON if (unlikely(Py_SIZE(x) < 0)) { @@ -12780,77 +11996,24 @@ static CYTHON_INLINE PY_LONG_LONG __Pyx_PyInt_As_PY_LONG_LONG(PyObject *x) { } #endif if (sizeof(PY_LONG_LONG) <= sizeof(unsigned long)) { - __PYX_VERIFY_RETURN_INT_EXC(PY_LONG_LONG, unsigned long, PyLong_AsUnsignedLong(x)) + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned long, PyLong_AsUnsignedLong(x)) } else if (sizeof(PY_LONG_LONG) <= sizeof(unsigned PY_LONG_LONG)) { - __PYX_VERIFY_RETURN_INT_EXC(PY_LONG_LONG, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) } } else { -#if CYTHON_USE_PYLONG_INTERNALS - const digit* digits = ((PyLongObject*)x)->ob_digit; +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS switch (Py_SIZE(x)) { - case 0: return (PY_LONG_LONG) 0; - case -1: __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, sdigit, -(sdigit) digits[0]) - case 1: __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, digit, +digits[0]) - case -2: - if (8 * sizeof(PY_LONG_LONG) - 1 > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { - return (PY_LONG_LONG) -(((((PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 2: - if (8 * sizeof(PY_LONG_LONG) > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { - return (PY_LONG_LONG) (((((PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case -3: - if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { - return (PY_LONG_LONG) -(((((((PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 3: - if (8 * sizeof(PY_LONG_LONG) > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { - return (PY_LONG_LONG) (((((((PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case -4: - if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { - return (PY_LONG_LONG) -(((((((((PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 4: - if (8 * sizeof(PY_LONG_LONG) > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { - return (PY_LONG_LONG) (((((((((PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; + case 0: return 0; + case 1: __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, digit, +(((PyLongObject*)x)->ob_digit[0])); + case -1: __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, sdigit, -(sdigit) ((PyLongObject*)x)->ob_digit[0]); } + #endif #endif if (sizeof(PY_LONG_LONG) <= sizeof(long)) { - __PYX_VERIFY_RETURN_INT_EXC(PY_LONG_LONG, long, PyLong_AsLong(x)) + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, long, PyLong_AsLong(x)) } else if (sizeof(PY_LONG_LONG) <= sizeof(PY_LONG_LONG)) { - __PYX_VERIFY_RETURN_INT_EXC(PY_LONG_LONG, PY_LONG_LONG, PyLong_AsLongLong(x)) + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, PY_LONG_LONG, PyLong_AsLongLong(x)) } } { @@ -12899,7 +12062,7 @@ static CYTHON_INLINE PY_LONG_LONG __Pyx_PyInt_As_PY_LONG_LONG(PyObject *x) { } static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { - const long neg_one = (long) -1, const_zero = (long) 0; + const long neg_one = (long) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; if (is_unsigned) { if (sizeof(long) < sizeof(long)) { @@ -12925,7 +12088,7 @@ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { } static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { - const int neg_one = (int) -1, const_zero = (int) 0; + const int neg_one = (int) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_Check(x))) { @@ -12942,39 +12105,13 @@ static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { #endif if (likely(PyLong_Check(x))) { if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - const digit* digits = ((PyLongObject*)x)->ob_digit; +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS switch (Py_SIZE(x)) { - case 0: return (int) 0; - case 1: __PYX_VERIFY_RETURN_INT(int, digit, digits[0]) - case 2: - if (8 * sizeof(int) > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(int) >= 2 * PyLong_SHIFT) { - return (int) (((((int)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 3: - if (8 * sizeof(int) > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(int) >= 3 * PyLong_SHIFT) { - return (int) (((((((int)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 4: - if (8 * sizeof(int) > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(int) >= 4 * PyLong_SHIFT) { - return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; + case 0: return 0; + case 1: __PYX_VERIFY_RETURN_INT(int, digit, ((PyLongObject*)x)->ob_digit[0]); } + #endif #endif #if CYTHON_COMPILING_IN_CPYTHON if (unlikely(Py_SIZE(x) < 0)) { @@ -12990,77 +12127,24 @@ static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { } #endif if (sizeof(int) <= sizeof(unsigned long)) { - __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) + __PYX_VERIFY_RETURN_INT(int, unsigned long, PyLong_AsUnsignedLong(x)) } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { - __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) + __PYX_VERIFY_RETURN_INT(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) } } else { -#if CYTHON_USE_PYLONG_INTERNALS - const digit* digits = ((PyLongObject*)x)->ob_digit; +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS switch (Py_SIZE(x)) { - case 0: return (int) 0; - case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, -(sdigit) digits[0]) - case 1: __PYX_VERIFY_RETURN_INT(int, digit, +digits[0]) - case -2: - if (8 * sizeof(int) - 1 > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { - return (int) -(((((int)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 2: - if (8 * sizeof(int) > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { - return (int) (((((int)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case -3: - if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { - return (int) -(((((((int)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 3: - if (8 * sizeof(int) > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { - return (int) (((((((int)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case -4: - if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { - return (int) -(((((((((int)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 4: - if (8 * sizeof(int) > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { - return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; + case 0: return 0; + case 1: __PYX_VERIFY_RETURN_INT(int, digit, +(((PyLongObject*)x)->ob_digit[0])); + case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, -(sdigit) ((PyLongObject*)x)->ob_digit[0]); } + #endif #endif if (sizeof(int) <= sizeof(long)) { - __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) + __PYX_VERIFY_RETURN_INT(int, long, PyLong_AsLong(x)) } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { - __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) + __PYX_VERIFY_RETURN_INT(int, PY_LONG_LONG, PyLong_AsLongLong(x)) } } { @@ -13109,7 +12193,7 @@ static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { } static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_As_unsigned_PY_LONG_LONG(PyObject *x) { - const unsigned PY_LONG_LONG neg_one = (unsigned PY_LONG_LONG) -1, const_zero = (unsigned PY_LONG_LONG) 0; + const unsigned PY_LONG_LONG neg_one = (unsigned PY_LONG_LONG) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_Check(x))) { @@ -13126,39 +12210,13 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_As_unsigned_PY_LONG_LONG( #endif if (likely(PyLong_Check(x))) { if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - const digit* digits = ((PyLongObject*)x)->ob_digit; +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS switch (Py_SIZE(x)) { - case 0: return (unsigned PY_LONG_LONG) 0; - case 1: __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, digit, digits[0]) - case 2: - if (8 * sizeof(unsigned PY_LONG_LONG) > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(unsigned PY_LONG_LONG) >= 2 * PyLong_SHIFT) { - return (unsigned PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 3: - if (8 * sizeof(unsigned PY_LONG_LONG) > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(unsigned PY_LONG_LONG) >= 3 * PyLong_SHIFT) { - return (unsigned PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 4: - if (8 * sizeof(unsigned PY_LONG_LONG) > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(unsigned PY_LONG_LONG) >= 4 * PyLong_SHIFT) { - return (unsigned PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; + case 0: return 0; + case 1: __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, digit, ((PyLongObject*)x)->ob_digit[0]); } + #endif #endif #if CYTHON_COMPILING_IN_CPYTHON if (unlikely(Py_SIZE(x) < 0)) { @@ -13174,77 +12232,24 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_As_unsigned_PY_LONG_LONG( } #endif if (sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long)) { - __PYX_VERIFY_RETURN_INT_EXC(unsigned PY_LONG_LONG, unsigned long, PyLong_AsUnsignedLong(x)) + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned long, PyLong_AsUnsignedLong(x)) } else if (sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned PY_LONG_LONG)) { - __PYX_VERIFY_RETURN_INT_EXC(unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) } } else { -#if CYTHON_USE_PYLONG_INTERNALS - const digit* digits = ((PyLongObject*)x)->ob_digit; +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS switch (Py_SIZE(x)) { - case 0: return (unsigned PY_LONG_LONG) 0; - case -1: __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, sdigit, -(sdigit) digits[0]) - case 1: __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, digit, +digits[0]) - case -2: - if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { - return (unsigned PY_LONG_LONG) -(((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 2: - if (8 * sizeof(unsigned PY_LONG_LONG) > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { - return (unsigned PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case -3: - if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { - return (unsigned PY_LONG_LONG) -(((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 3: - if (8 * sizeof(unsigned PY_LONG_LONG) > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { - return (unsigned PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case -4: - if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { - return (unsigned PY_LONG_LONG) -(((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 4: - if (8 * sizeof(unsigned PY_LONG_LONG) > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { - return (unsigned PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; + case 0: return 0; + case 1: __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, digit, +(((PyLongObject*)x)->ob_digit[0])); + case -1: __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, sdigit, -(sdigit) ((PyLongObject*)x)->ob_digit[0]); } + #endif #endif if (sizeof(unsigned PY_LONG_LONG) <= sizeof(long)) { - __PYX_VERIFY_RETURN_INT_EXC(unsigned PY_LONG_LONG, long, PyLong_AsLong(x)) + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, long, PyLong_AsLong(x)) } else if (sizeof(unsigned PY_LONG_LONG) <= sizeof(PY_LONG_LONG)) { - __PYX_VERIFY_RETURN_INT_EXC(unsigned PY_LONG_LONG, PY_LONG_LONG, PyLong_AsLongLong(x)) + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, PY_LONG_LONG, PyLong_AsLongLong(x)) } } { @@ -13293,7 +12298,7 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_As_unsigned_PY_LONG_LONG( } static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_PY_LONG_LONG(unsigned PY_LONG_LONG value) { - const unsigned PY_LONG_LONG neg_one = (unsigned PY_LONG_LONG) -1, const_zero = (unsigned PY_LONG_LONG) 0; + const unsigned PY_LONG_LONG neg_one = (unsigned PY_LONG_LONG) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; if (is_unsigned) { if (sizeof(unsigned PY_LONG_LONG) < sizeof(long)) { @@ -13319,7 +12324,7 @@ static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_PY_LONG_LONG(unsigned P } static CYTHON_INLINE npy_uint32 __Pyx_PyInt_As_npy_uint32(PyObject *x) { - const npy_uint32 neg_one = (npy_uint32) -1, const_zero = (npy_uint32) 0; + const npy_uint32 neg_one = (npy_uint32) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_Check(x))) { @@ -13336,39 +12341,13 @@ static CYTHON_INLINE npy_uint32 __Pyx_PyInt_As_npy_uint32(PyObject *x) { #endif if (likely(PyLong_Check(x))) { if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - const digit* digits = ((PyLongObject*)x)->ob_digit; +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS switch (Py_SIZE(x)) { - case 0: return (npy_uint32) 0; - case 1: __PYX_VERIFY_RETURN_INT(npy_uint32, digit, digits[0]) - case 2: - if (8 * sizeof(npy_uint32) > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(npy_uint32) >= 2 * PyLong_SHIFT) { - return (npy_uint32) (((((npy_uint32)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 3: - if (8 * sizeof(npy_uint32) > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(npy_uint32) >= 3 * PyLong_SHIFT) { - return (npy_uint32) (((((((npy_uint32)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 4: - if (8 * sizeof(npy_uint32) > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(npy_uint32) >= 4 * PyLong_SHIFT) { - return (npy_uint32) (((((((((npy_uint32)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; + case 0: return 0; + case 1: __PYX_VERIFY_RETURN_INT(npy_uint32, digit, ((PyLongObject*)x)->ob_digit[0]); } + #endif #endif #if CYTHON_COMPILING_IN_CPYTHON if (unlikely(Py_SIZE(x) < 0)) { @@ -13384,77 +12363,24 @@ static CYTHON_INLINE npy_uint32 __Pyx_PyInt_As_npy_uint32(PyObject *x) { } #endif if (sizeof(npy_uint32) <= sizeof(unsigned long)) { - __PYX_VERIFY_RETURN_INT_EXC(npy_uint32, unsigned long, PyLong_AsUnsignedLong(x)) + __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned long, PyLong_AsUnsignedLong(x)) } else if (sizeof(npy_uint32) <= sizeof(unsigned PY_LONG_LONG)) { - __PYX_VERIFY_RETURN_INT_EXC(npy_uint32, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) + __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) } } else { -#if CYTHON_USE_PYLONG_INTERNALS - const digit* digits = ((PyLongObject*)x)->ob_digit; +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS switch (Py_SIZE(x)) { - case 0: return (npy_uint32) 0; - case -1: __PYX_VERIFY_RETURN_INT(npy_uint32, sdigit, -(sdigit) digits[0]) - case 1: __PYX_VERIFY_RETURN_INT(npy_uint32, digit, +digits[0]) - case -2: - if (8 * sizeof(npy_uint32) - 1 > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(npy_uint32, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(npy_uint32) - 1 > 2 * PyLong_SHIFT) { - return (npy_uint32) -(((((npy_uint32)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 2: - if (8 * sizeof(npy_uint32) > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(npy_uint32) - 1 > 2 * PyLong_SHIFT) { - return (npy_uint32) (((((npy_uint32)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case -3: - if (8 * sizeof(npy_uint32) - 1 > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(npy_uint32, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(npy_uint32) - 1 > 3 * PyLong_SHIFT) { - return (npy_uint32) -(((((((npy_uint32)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 3: - if (8 * sizeof(npy_uint32) > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(npy_uint32) - 1 > 3 * PyLong_SHIFT) { - return (npy_uint32) (((((((npy_uint32)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case -4: - if (8 * sizeof(npy_uint32) - 1 > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(npy_uint32, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(npy_uint32) - 1 > 4 * PyLong_SHIFT) { - return (npy_uint32) -(((((((((npy_uint32)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 4: - if (8 * sizeof(npy_uint32) > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(npy_uint32) - 1 > 4 * PyLong_SHIFT) { - return (npy_uint32) (((((((((npy_uint32)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; + case 0: return 0; + case 1: __PYX_VERIFY_RETURN_INT(npy_uint32, digit, +(((PyLongObject*)x)->ob_digit[0])); + case -1: __PYX_VERIFY_RETURN_INT(npy_uint32, sdigit, -(sdigit) ((PyLongObject*)x)->ob_digit[0]); } + #endif #endif if (sizeof(npy_uint32) <= sizeof(long)) { - __PYX_VERIFY_RETURN_INT_EXC(npy_uint32, long, PyLong_AsLong(x)) + __PYX_VERIFY_RETURN_INT(npy_uint32, long, PyLong_AsLong(x)) } else if (sizeof(npy_uint32) <= sizeof(PY_LONG_LONG)) { - __PYX_VERIFY_RETURN_INT_EXC(npy_uint32, PY_LONG_LONG, PyLong_AsLongLong(x)) + __PYX_VERIFY_RETURN_INT(npy_uint32, PY_LONG_LONG, PyLong_AsLongLong(x)) } } { @@ -13519,7 +12445,7 @@ static CYTHON_INLINE long __Pyx_pow_long(long b, long e) { #endif t = 1; while (likely(e)) { - t *= (b * (e&1)) | ((~e)&1); + t *= (b * (e&1)) | ((~e)&1); /* 1 or b */ b *= b; e >>= 1; } @@ -13766,34 +12692,8 @@ static CYTHON_INLINE long __Pyx_pow_long(long b, long e) { #endif #endif -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_enum__NPY_TYPES(enum NPY_TYPES value) { - const enum NPY_TYPES neg_one = (enum NPY_TYPES) -1, const_zero = (enum NPY_TYPES) 0; - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(enum NPY_TYPES) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(enum NPY_TYPES) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); - } else if (sizeof(enum NPY_TYPES) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); - } - } else { - if (sizeof(enum NPY_TYPES) <= sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(enum NPY_TYPES) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); - } - } - { - int one = 1; int little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&value; - return _PyLong_FromByteArray(bytes, sizeof(enum NPY_TYPES), - little, !is_unsigned); - } -} - static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { - const long neg_one = (long) -1, const_zero = (long) 0; + const long neg_one = (long) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_Check(x))) { @@ -13810,39 +12710,13 @@ static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { #endif if (likely(PyLong_Check(x))) { if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - const digit* digits = ((PyLongObject*)x)->ob_digit; +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS switch (Py_SIZE(x)) { - case 0: return (long) 0; - case 1: __PYX_VERIFY_RETURN_INT(long, digit, digits[0]) - case 2: - if (8 * sizeof(long) > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(long) >= 2 * PyLong_SHIFT) { - return (long) (((((long)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 3: - if (8 * sizeof(long) > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(long) >= 3 * PyLong_SHIFT) { - return (long) (((((((long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 4: - if (8 * sizeof(long) > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(long) >= 4 * PyLong_SHIFT) { - return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; + case 0: return 0; + case 1: __PYX_VERIFY_RETURN_INT(long, digit, ((PyLongObject*)x)->ob_digit[0]); } + #endif #endif #if CYTHON_COMPILING_IN_CPYTHON if (unlikely(Py_SIZE(x) < 0)) { @@ -13858,77 +12732,24 @@ static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { } #endif if (sizeof(long) <= sizeof(unsigned long)) { - __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) + __PYX_VERIFY_RETURN_INT(long, unsigned long, PyLong_AsUnsignedLong(x)) } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { - __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) + __PYX_VERIFY_RETURN_INT(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) } } else { -#if CYTHON_USE_PYLONG_INTERNALS - const digit* digits = ((PyLongObject*)x)->ob_digit; +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS switch (Py_SIZE(x)) { - case 0: return (long) 0; - case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, -(sdigit) digits[0]) - case 1: __PYX_VERIFY_RETURN_INT(long, digit, +digits[0]) - case -2: - if (8 * sizeof(long) - 1 > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { - return (long) -(((((long)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 2: - if (8 * sizeof(long) > 1 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { - return (long) (((((long)digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case -3: - if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { - return (long) -(((((((long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 3: - if (8 * sizeof(long) > 2 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { - return (long) (((((((long)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case -4: - if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { - return (long) -(((((((((long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; - case 4: - if (8 * sizeof(long) > 3 * PyLong_SHIFT) { - if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0]))) - } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { - return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - } - break; + case 0: return 0; + case 1: __PYX_VERIFY_RETURN_INT(long, digit, +(((PyLongObject*)x)->ob_digit[0])); + case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, -(sdigit) ((PyLongObject*)x)->ob_digit[0]); } + #endif #endif if (sizeof(long) <= sizeof(long)) { - __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) + __PYX_VERIFY_RETURN_INT(long, long, PyLong_AsLong(x)) } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { - __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) + __PYX_VERIFY_RETURN_INT(long, PY_LONG_LONG, PyLong_AsLongLong(x)) } } { @@ -14176,7 +12997,7 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) { return __Pyx_PyObject_AsStringAndSize(o, &ignore); } static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { -#if CYTHON_COMPILING_IN_CPYTHON && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT if ( #if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII __Pyx_sys_getdefaultencoding_not_ascii && @@ -14217,7 +13038,7 @@ static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_ #endif } else #endif -#if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) +#if !CYTHON_COMPILING_IN_PYPY if (PyByteArray_Check(o)) { *length = PyByteArray_GET_SIZE(o); return PyByteArray_AS_STRING(o); @@ -14247,7 +13068,7 @@ static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) { #else if (PyLong_Check(x)) #endif - return __Pyx_NewRef(x); + return Py_INCREF(x), x; m = Py_TYPE(x)->tp_as_number; #if PY_MAJOR_VERSION < 3 if (m && m->nb_int) { @@ -14287,55 +13108,18 @@ static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { Py_ssize_t ival; PyObject *x; #if PY_MAJOR_VERSION < 3 - if (likely(PyInt_CheckExact(b))) { - if (sizeof(Py_ssize_t) >= sizeof(long)) - return PyInt_AS_LONG(b); - else - return PyInt_AsSsize_t(x); - } + if (likely(PyInt_CheckExact(b))) + return PyInt_AS_LONG(b); #endif if (likely(PyLong_CheckExact(b))) { - #if CYTHON_USE_PYLONG_INTERNALS - const digit* digits = ((PyLongObject*)b)->ob_digit; - const Py_ssize_t size = Py_SIZE(b); - if (likely(__Pyx_sst_abs(size) <= 1)) { - ival = likely(size) ? digits[0] : 0; - if (size == -1) ival = -ival; - return ival; - } else { - switch (size) { - case 2: - if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { - return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | digits[0])); - } - break; - case -2: - if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | digits[0])); - } - break; - case 3: - if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { - return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - break; - case -3: - if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - break; - case 4: - if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { - return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - break; - case -4: - if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | digits[2]) << PyLong_SHIFT) | digits[1]) << PyLong_SHIFT) | digits[0])); - } - break; - } - } + #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + switch (Py_SIZE(b)) { + case -1: return -(sdigit)((PyLongObject*)b)->ob_digit[0]; + case 0: return 0; + case 1: return ((PyLongObject*)b)->ob_digit[0]; + } + #endif #endif return PyLong_AsSsize_t(b); } diff --git a/gensim/models/word2vec_inner.pyx b/gensim/models/word2vec_inner.pyx index 2946ae3935..632e15d933 100755 --- a/gensim/models/word2vec_inner.pyx +++ b/gensim/models/word2vec_inner.pyx @@ -24,7 +24,7 @@ except ImportError: REAL = np.float32 -DEF MAX_SENTENCE_LEN = 10000 +DEF MAX_SENTENCE_LEN = 100000 DEF MAX_NUM_SENTENCES = 1000 cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x @@ -561,7 +561,7 @@ def score_sentence_sg(model, sentence, _work): for token in sentence: word = vlookup[token] if token in vlookup else None if word is None: - continue # should drop the + continue # should drop the indexes[i] = word.index codelens[i] = len(word.code) codes[i] = np.PyArray_DATA(word.code) @@ -579,10 +579,10 @@ def score_sentence_sg(model, sentence, _work): for i in range(sentence_len): if codelens[i] == 0: continue - j = i - window + j = i - window if j < 0: j = 0 - k = i + window + 1 + k = i + window + 1 if k > sentence_len: k = sentence_len for j in range(j, k): @@ -661,7 +661,7 @@ def score_sentence_cbow(model, sentence, _work, _neu1): for i in range(sentence_len): if codelens[i] == 0: continue - j = i - window + j = i - window if j < 0: j = 0 k = i + window + 1 @@ -738,7 +738,7 @@ def init(): return 0 # double elif (abs(p_res[0] - expected) < 0.0001): our_dot = our_dot_float - our_saxpy = saxpy + our_saxpy = saxpy return 1 # float else: # neither => use cython loops, no BLAS @@ -748,3 +748,5 @@ def init(): return 2 FAST_VERSION = init() # initialize the module +MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN +MAX_BATCH_SENTENCES = MAX_NUM_SENTENCES diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py index 5acb4d8f74..7072752aca 100644 --- a/gensim/models/wrappers/ldamallet.py +++ b/gensim/models/wrappers/ldamallet.py @@ -39,7 +39,7 @@ from gensim import utils, matutils -logger = logging.getLogger('gensim.models.wrappers.ldamallet') +logger = logging.getLogger(__name__) def read_doctopics(fname, eps=1e-6): diff --git a/gensim/summarization/keywords.py b/gensim/summarization/keywords.py index 5146c560ad..b66f096263 100644 --- a/gensim/summarization/keywords.py +++ b/gensim/summarization/keywords.py @@ -8,16 +8,20 @@ from gensim.summarization.textcleaner import tokenize_by_word as _tokenize_by_word from gensim.summarization.commons import build_graph as _build_graph from gensim.summarization.commons import remove_unreachable_nodes as _remove_unreachable_nodes +from gensim.utils import to_unicode from itertools import combinations as _combinations from six.moves.queue import Queue as _Queue from six.moves import xrange +from six import iteritems WINDOW_SIZE = 2 -"""Check tags in http://www.clips.ua.ac.be/pages/mbsp-tags and use only first two letters +""" +Check tags in http://www.clips.ua.ac.be/pages/mbsp-tags and use only first two letters Example: filter for nouns and adjectives: -INCLUDING_FILTER = ['NN', 'JJ']""" +INCLUDING_FILTER = ['NN', 'JJ'] +""" INCLUDING_FILTER = ['NN', 'JJ'] EXCLUDING_FILTER = [] @@ -26,13 +30,17 @@ def _get_pos_filters(): return frozenset(INCLUDING_FILTER), frozenset(EXCLUDING_FILTER) -def _get_words_for_graph(tokens): - include_filters, exclude_filters = _get_pos_filters() +def _get_words_for_graph(tokens, pos_filter): + if pos_filter is None: + include_filters, exclude_filters = _get_pos_filters() + else: + include_filters = set(pos_filter) + exclude_filters = frozenset([]) if include_filters and exclude_filters: raise ValueError("Can't use both include and exclude filters, should use only one") result = [] - for word, unit in tokens.iteritems(): + for word, unit in iteritems(tokens): if exclude_filters and unit.tag in exclude_filters: continue if (include_filters and unit.tag in include_filters) or not include_filters or not unit.tag: @@ -111,7 +119,7 @@ def _extract_tokens(lemmas, scores, ratio, words): def _lemmas_to_words(tokens): lemma_to_word = {} - for word, unit in tokens.iteritems(): + for word, unit in iteritems(tokens): lemma = unit.token if lemma in lemma_to_word: lemma_to_word[lemma].append(word) @@ -156,7 +164,7 @@ def _get_combined_keywords(_keywords, split_text): result.append(word) # appends last word if keyword and doesn't iterate for j in xrange(i + 1, len_text): other_word = _strip_word(split_text[j]) - if other_word in _keywords and other_word == split_text[j].decode("utf-8"): + if other_word in _keywords and other_word == split_text[j]: combined_word.append(other_word) else: for keyword in combined_word: @@ -189,13 +197,14 @@ def _format_results(_keywords, combined_keywords, split, scores): return "\n".join(combined_keywords) -def keywords(text, ratio=0.2, words=None, split=False, scores=False): +def keywords(text, ratio=0.2, words=None, split=False, scores=False, pos_filter=['NN', 'JJ'], lemmatize=False): # Gets a dict of word -> lemma + text = to_unicode(text) tokens = _clean_text_by_word(text) split_text = list(_tokenize_by_word(text)) # Creates the graph and adds the edges - graph = _build_graph(_get_words_for_graph(tokens)) + graph = _build_graph(_get_words_for_graph(tokens, pos_filter)) _set_graph_edges(graph, tokens, split_text) del split_text # It's no longer used @@ -206,7 +215,14 @@ def keywords(text, ratio=0.2, words=None, split=False, scores=False): extracted_lemmas = _extract_tokens(graph.nodes(), pagerank_scores, ratio, words) - lemmas_to_word = _lemmas_to_words(tokens) + # The results can be polluted by many variations of the same word + if lemmatize: + lemmas_to_word = {} + for word, unit in iteritems(tokens): + lemmas_to_word[unit.token] = [word] + else: + lemmas_to_word = _lemmas_to_words(tokens) + keywords = _get_keywords_with_score(extracted_lemmas, lemmas_to_word) # text.split() to keep numbers and punctuation marks, so separeted concepts are not combined diff --git a/gensim/summarization/pagerank_weighted.py b/gensim/summarization/pagerank_weighted.py index 20fa4df6cf..1978c6e1c7 100644 --- a/gensim/summarization/pagerank_weighted.py +++ b/gensim/summarization/pagerank_weighted.py @@ -2,10 +2,9 @@ # -*- coding: utf-8 -*- # # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html - from numpy import empty as empty_matrix from scipy.sparse import csr_matrix -from scipy.linalg import eig +from scipy.sparse.linalg import eigs from six.moves import xrange try: @@ -21,8 +20,10 @@ def pagerank_weighted(graph, damping=0.85): probability_matrix = build_probability_matrix(graph) pagerank_matrix = damping * adjacency_matrix.todense() + (1 - damping) * probability_matrix - vals, vecs = eig(pagerank_matrix, left=True, right=False) - return process_results(graph, vecs) + + vals, vecs = eigs(pagerank_matrix.T, k=1) # TODO raise an error if matrix has complex eigenvectors? + + return process_results(graph, vecs.real) def build_adjacency_matrix(graph): @@ -37,7 +38,7 @@ def build_adjacency_matrix(graph): neighbors_sum = sum(graph.edge_weight((current_node, neighbor)) for neighbor in graph.neighbors(current_node)) for j in xrange(length): edge_weight = float(graph.edge_weight((current_node, nodes[j]))) - if i != j and edge_weight != 0: + if i != j and edge_weight != 0.0: row.append(i) col.append(j) data.append(edge_weight / neighbors_sum) @@ -49,7 +50,7 @@ def build_probability_matrix(graph): dimension = len(graph.nodes()) matrix = empty_matrix((dimension, dimension)) - probability = 1 / float(dimension) + probability = 1.0 / float(dimension) matrix.fill(probability) return matrix @@ -58,6 +59,6 @@ def build_probability_matrix(graph): def process_results(graph, vecs): scores = {} for i, node in enumerate(graph.nodes()): - scores[node] = abs(vecs[i][0]) + scores[node] = abs(vecs[i, :]) return scores diff --git a/gensim/summarization/summarizer.py b/gensim/summarization/summarizer.py index 9e5e343299..2d46716b4a 100644 --- a/gensim/summarization/summarizer.py +++ b/gensim/summarization/summarizer.py @@ -10,13 +10,14 @@ from gensim.summarization.commons import remove_unreachable_nodes as _remove_unreachable_nodes from gensim.summarization.bm25 import get_bm25_weights as _bm25_weights from gensim.corpora import Dictionary -from scipy.sparse import csr_matrix from math import log10 as _log10 from six.moves import xrange INPUT_MIN_LENGTH = 10 +WEIGHT_THRESHOLD = 1.e-3 + logger = logging.getLogger(__name__) @@ -26,7 +27,7 @@ def _set_graph_edge_weights(graph): for i in xrange(len(documents)): for j in xrange(len(documents)): - if i == j: + if i == j or weights[i][j] < WEIGHT_THRESHOLD: continue sentence_1 = documents[i] diff --git a/gensim/test/test_data/mihalcea_tarau.kw.txt b/gensim/test/test_data/mihalcea_tarau.kw.txt new file mode 100644 index 0000000000..b8ea0cabc3 --- /dev/null +++ b/gensim/test/test_data/mihalcea_tarau.kw.txt @@ -0,0 +1,21 @@ +gilbert +hurricane +winds +coast +storm +saturday +flood +flooding +weather +alert +defense alerted +strong +people +pushed +puerto +cabral said +north +associated +south +domingo +residents diff --git a/gensim/test/test_data/mihalcea_tarau.kwpos.txt b/gensim/test/test_data/mihalcea_tarau.kwpos.txt new file mode 100644 index 0000000000..7e14dfaae3 --- /dev/null +++ b/gensim/test/test_data/mihalcea_tarau.kwpos.txt @@ -0,0 +1,30 @@ +gilbert +hurricane +coast +storm +saturday +winds heavy +flood +flooding +weather +alert +defense alerted +strong +pushed +people +puerto +cabral said +north +associated +south +domingo +residents +dominican +miles +southeast +san +civil +home +reached +juan +named diff --git a/gensim/test/test_hdpmodel.py b/gensim/test/test_hdpmodel.py new file mode 100644 index 0000000000..9f6d06aaf3 --- /dev/null +++ b/gensim/test/test_hdpmodel.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2010 Radim Rehurek +# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html + +""" +Automated tests for checking transformation algorithms (the models package). +""" + + +import logging +import unittest +import os +import os.path +import tempfile + +import six +import numpy +import scipy.linalg + +from gensim.corpora import mmcorpus, Dictionary +from gensim.models import hdpmodel +from gensim import matutils + + +module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder +datapath = lambda fname: os.path.join(module_path, 'test_data', fname) + + +# set up vars used in testing ("Deerwester" from the web tutorial) +texts = [['human', 'interface', 'computer'], + ['survey', 'user', 'computer', 'system', 'response', 'time'], + ['eps', 'user', 'interface', 'system'], + ['system', 'human', 'system', 'eps'], + ['user', 'response', 'time'], + ['trees'], + ['graph', 'trees'], + ['graph', 'minors', 'trees'], + ['graph', 'minors', 'survey']] +dictionary = Dictionary(texts) +corpus = [dictionary.doc2bow(text) for text in texts] + + +def testfile(): + # temporary data will be stored to this file + return os.path.join(tempfile.gettempdir(), 'gensim_models.tst') + + + +class TestHdpModel(unittest.TestCase): + def setUp(self): + self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) + self.class_ = hdpmodel.HdpModel + self.model = self.class_(corpus, id2word=dictionary) + + def testShowTopics(self): + topics = self.model.show_topics(formatted=False) + + for topic_no, topic in topics: + self.assertTrue(isinstance(topic_no, int)) + self.assertTrue(isinstance(topic, list)) + for k, v in topic: + self.assertTrue(isinstance(k, six.string_types)) + self.assertTrue(isinstance(v, float)) + + + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) + unittest.main() diff --git a/gensim/test/test_keywords.py b/gensim/test/test_keywords.py new file mode 100644 index 0000000000..952ba2fadd --- /dev/null +++ b/gensim/test/test_keywords.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html + +""" +Automated test to reproduce the results of Mihalcea and Tarau (2004). + +Mihalcea and Tarau (2004) introduces the TextRank summarization algorithm. +As a validation of the gensim implementation we reproduced its results +in this test. + +""" + +import os.path +import logging +import unittest + +from gensim import utils +from gensim.corpora import Dictionary +from gensim.summarization import keywords + + +class TestKeywordsTest(unittest.TestCase): + + def test_text_keywords(self): + pre_path = os.path.join(os.path.dirname(__file__), 'test_data') + + with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.txt"), mode="r") as f: + text = f.read() + + # calculate keywords + generated_keywords = keywords(text, split=True) + + # To be compared to the reference. + with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.kw.txt"), mode="r") as f: + kw = f.read().strip().split("\n") + + self.assertEqual(set(map(str, generated_keywords)), set(map(str, kw))) + + def test_text_keywords_words(self): + pre_path = os.path.join(os.path.dirname(__file__), 'test_data') + + with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.txt"), mode="r") as f: + text = f.read() + + # calculate exactly 13 keywords + generated_keywords = keywords(text, words=15, split=True) + + self.assertEqual(len(generated_keywords), 16) + + def test_text_keywords_pos(self): + pre_path = os.path.join(os.path.dirname(__file__), 'test_data') + + with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.txt"), mode="r") as f: + text = f.read() + + # calculate keywords using only certain parts of speech + generated_keywords_NNVBJJ = keywords(text, pos_filter=['NN', 'VB', 'JJ'], ratio=0.3, split=True) + + # To be compared to the reference. + with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.kwpos.txt"), mode="r") as f: + kw = f.read().strip().split("\n") + + self.assertEqual(set(map(str, generated_keywords_NNVBJJ)), set(map(str, kw))) + + def test_text_summarization_raises_exception_on_short_input_text(self): + pre_path = os.path.join(os.path.dirname(__file__), 'test_data') + + with utils.smart_open(os.path.join(pre_path, "testsummarization_unrelated.txt"), mode="r") as f: + text = f.read() + + # Keeps the first 8 sentences to make the text shorter. + text = "\n".join(text.split('\n')[:8]) + + self.assertTrue(keywords(text) is not None) + + + def test_keywords_ratio(self): + pre_path = os.path.join(os.path.dirname(__file__), 'test_data') + + with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.txt"), mode="r") as f: + text = f.read() + + # Check ratio parameter is well behaved. Because length is taken on tokenized clean text + # we just check that ratio 20% is twice as long as ratio 10% + # Values of 10% and 20% were carefully selected for this test to avoid + # numerical instabilities when several keywords have almost the same score + selected_docs_12 = keywords(text, ratio=0.1, split=True) + selected_docs_21 = keywords(text, ratio=0.2, split=True) + + self.assertAlmostEqual(float(len(selected_docs_21))/len(selected_docs_12), float(21)/12, places=1) + + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) + unittest.main() diff --git a/gensim/test/test_ldamallet_wrapper.py b/gensim/test/test_ldamallet_wrapper.py new file mode 100644 index 0000000000..ab7af8dce8 --- /dev/null +++ b/gensim/test/test_ldamallet_wrapper.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2010 Radim Rehurek +# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html + +""" +Automated tests for checking transformation algorithms (the models package). +""" + + +import logging +import unittest +import os +import os.path +import tempfile + +import six +import numpy +import scipy.linalg + +from gensim.corpora import mmcorpus, Dictionary +from gensim.models.wrappers import ldamallet +from gensim import matutils + + +module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder +datapath = lambda fname: os.path.join(module_path, 'test_data', fname) + + +# set up vars used in testing ("Deerwester" from the web tutorial) +texts = [['human', 'interface', 'computer'], + ['survey', 'user', 'computer', 'system', 'response', 'time'], + ['eps', 'user', 'interface', 'system'], + ['system', 'human', 'system', 'eps'], + ['user', 'response', 'time'], + ['trees'], + ['graph', 'trees'], + ['graph', 'minors', 'trees'], + ['graph', 'minors', 'survey']] +dictionary = Dictionary(texts) +corpus = [dictionary.doc2bow(text) for text in texts] + + +def testfile(): + # temporary data will be stored to this file + return os.path.join(tempfile.gettempdir(), 'gensim_models.tst') + + +class TestLdaMallet(unittest.TestCase): + def setUp(self): + self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) + mallet_home = os.environ.get('MALLET_HOME', None) + self.mallet_path = os.path.join(mallet_home, 'bin', 'mallet') if mallet_home else None + + def testTransform(self): + if not self.mallet_path: + return + passed = False + for i in range(5): # restart at most 5 times + # create the transformation model + model = ldamallet.LdaMallet(self.mallet_path, corpus, id2word=dictionary, num_topics=2, iterations=200) + + # transform one document + doc = list(corpus)[0] + transformed = model[doc] + + vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests + expected = [0.49, 0.51] + passed = numpy.allclose(sorted(vec), sorted(expected), atol=1e-2) # must contain the same values, up to re-ordering + if passed: + break + logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % + (i, sorted(vec), sorted(expected))) + self.assertTrue(passed) + + + def testPersistence(self): + if not self.mallet_path: + return + fname = testfile() + model = ldamallet.LdaMallet(self.mallet_path, self.corpus, num_topics=2, iterations=100) + model.save(fname) + model2 = ldamallet.LdaMallet.load(fname) + self.assertEqual(model.num_topics, model2.num_topics) + self.assertTrue(numpy.allclose(model.wordtopics, model2.wordtopics)) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector + + def testPersistenceCompressed(self): + if not self.mallet_path: + return + fname = testfile() + '.gz' + model = ldamallet.LdaMallet(self.mallet_path, self.corpus, num_topics=2, iterations=100) + model.save(fname) + model2 = ldamallet.LdaMallet.load(fname, mmap=None) + self.assertEqual(model.num_topics, model2.num_topics) + self.assertTrue(numpy.allclose(model.wordtopics, model2.wordtopics)) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector + + def testLargeMmap(self): + if not self.mallet_path: + return + fname = testfile() + model = ldamallet.LdaMallet(self.mallet_path, self.corpus, num_topics=2, iterations=100) + + # simulate storing large arrays separately + model.save(testfile(), sep_limit=0) + + # test loading the large model arrays with mmap + model2 = ldamodel.LdaModel.load(testfile(), mmap='r') + self.assertEqual(model.num_topics, model2.num_topics) + self.assertTrue(isinstance(model2.wordtopics, numpy.memmap)) + self.assertTrue(numpy.allclose(model.wordtopics, model2.wordtopics)) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector + + def testLargeMmapCompressed(self): + if not self.mallet_path: + return + fname = testfile() + '.gz' + model = ldamallet.LdaMallet(self.mallet_path, self.corpus, num_topics=2, iterations=100) + + # simulate storing large arrays separately + model.save(fname, sep_limit=0) + + # test loading the large model arrays with mmap + self.assertRaises(IOError, ldamodel.LdaModel.load, fname, mmap='r') +#endclass TestLdaMallet + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) + unittest.main() diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py new file mode 100644 index 0000000000..f7a2eaf820 --- /dev/null +++ b/gensim/test/test_ldamodel.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2010 Radim Rehurek +# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html + +""" +Automated tests for checking transformation algorithms (the models package). +""" + + +import logging +import unittest +import os +import os.path +import tempfile + +import six +import numpy +import scipy.linalg + +from gensim.corpora import mmcorpus, Dictionary +from gensim.models import ldamodel, ldamulticore +from gensim import matutils + + +module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder +datapath = lambda fname: os.path.join(module_path, 'test_data', fname) + + +# set up vars used in testing ("Deerwester" from the web tutorial) +texts = [['human', 'interface', 'computer'], + ['survey', 'user', 'computer', 'system', 'response', 'time'], + ['eps', 'user', 'interface', 'system'], + ['system', 'human', 'system', 'eps'], + ['user', 'response', 'time'], + ['trees'], + ['graph', 'trees'], + ['graph', 'minors', 'trees'], + ['graph', 'minors', 'survey']] +dictionary = Dictionary(texts) +corpus = [dictionary.doc2bow(text) for text in texts] + + +def testfile(): + # temporary data will be stored to this file + return os.path.join(tempfile.gettempdir(), 'gensim_models.tst') + + + +class TestLdaModel(unittest.TestCase): + def setUp(self): + self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) + self.class_ = ldamodel.LdaModel + self.model = self.class_(corpus, id2word=dictionary, num_topics=2, passes=100) + + def testTransform(self): + passed = False + # sometimes, LDA training gets stuck at a local minimum + # in that case try re-training the model from scratch, hoping for a + # better random initialization + for i in range(5): # restart at most 5 times + # create the transformation model + model = self.class_(id2word=dictionary, num_topics=2, passes=100) + model.update(self.corpus) + + # transform one document + doc = list(corpus)[0] + transformed = model[doc] + + vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests + expected = [0.13, 0.87] + passed = numpy.allclose(sorted(vec), sorted(expected), atol=1e-2) # must contain the same values, up to re-ordering + if passed: + break + logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % + (i, sorted(vec), sorted(expected))) + self.assertTrue(passed) + + def testTopTopics(self): + top_topics = self.model.top_topics(self.corpus) + + for topic, score in top_topics: + self.assertTrue(isinstance(topic, list)) + self.assertTrue(isinstance(score, float)) + + for v, k in topic: + self.assertTrue(isinstance(k, six.string_types)) + self.assertTrue(isinstance(v, float)) + + def testGetTopicTerms(self): + topic_terms = self.model.get_topic_terms(1) + + for k, v in topic_terms: + self.assertTrue(isinstance(k, int) or isinstance(k, numpy.int64)) + self.assertTrue(isinstance(v, float)) + + def testShowTopic(self): + topic = self.model.show_topic(1) + + for k, v in topic: + self.assertTrue(isinstance(k, six.string_types)) + self.assertTrue(isinstance(v, float)) + + def testShowTopics(self): + topics = self.model.show_topics(formatted=False) + + for topic_no, topic in topics: + self.assertTrue(isinstance(topic_no, int)) + self.assertTrue(isinstance(topic, list)) + for k, v in topic: + self.assertTrue(isinstance(k, six.string_types)) + self.assertTrue(isinstance(v, float)) + + + def testGetDocumentTopics(self): + doc_topics = self.model.get_document_topics(self.corpus) + + for topic in doc_topics: + self.assertTrue(isinstance(topic, list)) + for k, v in topic: + self.assertTrue(isinstance(k, int)) + self.assertTrue(isinstance(v, float)) + + doc_topics = self.model.get_document_topics(self.corpus[0]) + + for k, v in doc_topics: + self.assertTrue(isinstance(k, int)) + self.assertTrue(isinstance(v, float)) + + + def testPasses(self): + # long message includes the original error message with a custom one + self.longMessage = True + + # construct what we expect when passes aren't involved + test_rhots = list() + model = self.class_(id2word=dictionary, chunksize=1, num_topics=2) + final_rhot = lambda: pow(model.offset + (1 * model.num_updates) / model.chunksize, -model.decay) + + # generate 5 updates to test rhot on + for x in range(5): + model.update(self.corpus) + test_rhots.append(final_rhot()) + + for passes in [1, 5, 10, 50, 100]: + model = self.class_(id2word=dictionary, chunksize=1, num_topics=2, passes=passes) + self.assertEqual(final_rhot(), 1.0) + # make sure the rhot matches the test after each update + for test_rhot in test_rhots: + model.update(self.corpus) + + msg = ", ".join(map(str, [passes, model.num_updates, model.state.numdocs])) + self.assertAlmostEqual(final_rhot(), test_rhot, msg=msg) + + self.assertEqual(model.state.numdocs, len(corpus) * len(test_rhots)) + self.assertEqual(model.num_updates, len(corpus) * len(test_rhots)) + + # def testTopicSeeding(self): + # for topic in range(2): + # passed = False + # for i in range(5): # restart at most this many times, to mitigate LDA randomness + # # try seeding it both ways round, check you get the same + # # topics out but with which way round they are depending + # # on the way round they're seeded + # eta = numpy.ones((2, len(dictionary))) * 0.5 + # system = dictionary.token2id[u'system'] + # trees = dictionary.token2id[u'trees'] + + # # aggressively seed the word 'system', in one of the + # # two topics, 10 times higher than the other words + # eta[topic, system] *= 10.0 + + # model = self.class_(id2word=dictionary, num_topics=2, passes=200, eta=eta) + # model.update(self.corpus) + + # topics = [dict((word, p) for p, word in model.show_topic(j, topn=None)) for j in range(2)] + + # # check that the word 'system' in the topic we seeded got a high weight, + # # and the word 'trees' (the main word in the other topic) a low weight -- + # # and vice versa for the other topic (which we didn't seed with 'system') + # passed = ( + # (topics[topic][u'system'] > topics[topic][u'trees']) + # and + # (topics[1 - topic][u'system'] < topics[1 - topic][u'trees']) + # ) + # if passed: + # break + # logging.warning("LDA failed to converge on attempt %i (got %s)", i, topics) + # self.assertTrue(passed) + + def testPersistence(self): + fname = testfile() + model = self.model + model.save(fname) + model2 = self.class_.load(fname) + self.assertEqual(model.num_topics, model2.num_topics) + self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector + + def testPersistenceIgnore(self): + fname = testfile() + model = ldamodel.LdaModel(self.corpus, num_topics=2) + model.save(fname, ignore='id2word') + model2 = ldamodel.LdaModel.load(fname) + self.assertTrue(model2.id2word is None) + + model.save(fname, ignore=['id2word']) + model2 = ldamodel.LdaModel.load(fname) + self.assertTrue(model2.id2word is None) + + def testPersistenceCompressed(self): + fname = testfile() + '.gz' + model = self.model + model.save(fname) + model2 = self.class_.load(fname, mmap=None) + self.assertEqual(model.num_topics, model2.num_topics) + self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector + + def testLargeMmap(self): + fname = testfile() + model = self.model + + # simulate storing large arrays separately + model.save(testfile(), sep_limit=0) + + # test loading the large model arrays with mmap + model2 = self.class_.load(testfile(), mmap='r') + self.assertEqual(model.num_topics, model2.num_topics) + self.assertTrue(isinstance(model2.expElogbeta, numpy.memmap)) + self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector + + def testLargeMmapCompressed(self): + fname = testfile() + '.gz' + model = self.model + + # simulate storing large arrays separately + model.save(fname, sep_limit=0) + + # test loading the large model arrays with mmap + self.assertRaises(IOError, self.class_.load, fname, mmap='r') + +#endclass TestLdaModel + + +class TestLdaMulticore(TestLdaModel): + def setUp(self): + self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) + self.class_ = ldamulticore.LdaMulticore + self.model = self.class_(corpus, id2word=dictionary, num_topics=2, passes=100) + +#endclass TestLdaMulticore + + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) + unittest.main() diff --git a/gensim/test/test_logentropy_model.py b/gensim/test/test_logentropy_model.py new file mode 100644 index 0000000000..c0f143da81 --- /dev/null +++ b/gensim/test/test_logentropy_model.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2010 Radim Rehurek +# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html + +""" +Automated tests for checking transformation algorithms (the models package). +""" + + +import logging +import unittest +import os +import os.path +import tempfile + +import six +import numpy +import scipy.linalg + +from gensim.corpora import mmcorpus, Dictionary +from gensim.models import logentropy_model +from gensim import matutils + +module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder +datapath = lambda fname: os.path.join(module_path, 'test_data', fname) + + +# set up vars used in testing ("Deerwester" from the web tutorial) +texts = [['human', 'interface', 'computer'], + ['survey', 'user', 'computer', 'system', 'response', 'time'], + ['eps', 'user', 'interface', 'system'], + ['system', 'human', 'system', 'eps'], + ['user', 'response', 'time'], + ['trees'], + ['graph', 'trees'], + ['graph', 'minors', 'trees'], + ['graph', 'minors', 'survey']] +dictionary = Dictionary(texts) +corpus = [dictionary.doc2bow(text) for text in texts] + + +def testfile(): + # temporary data will be stored to this file + return os.path.join(tempfile.gettempdir(), 'gensim_models.tst') + + +class TestLogEntropyModel(unittest.TestCase): + def setUp(self): + self.corpus_small = mmcorpus.MmCorpus(datapath('test_corpus_small.mm')) + self.corpus_ok = mmcorpus.MmCorpus(datapath('test_corpus_ok.mm')) + + + def testTransform(self): + # create the transformation model + model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=False) + + # transform one document + doc = list(self.corpus_ok)[0] + transformed = model[doc] + + expected = [(0, 0.3748900964125389), + (1, 0.30730215324230725), + (3, 1.20941755462856)] + self.assertTrue(numpy.allclose(transformed, expected)) + + + def testPersistence(self): + fname = testfile() + model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=True) + model.save(fname) + model2 = logentropy_model.LogEntropyModel.load(fname) + self.assertTrue(model.entr == model2.entr) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) + + def testPersistenceCompressed(self): + fname = testfile() + '.gz' + model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=True) + model.save(fname) + model2 = logentropy_model.LogEntropyModel.load(fname, mmap=None) + self.assertTrue(model.entr == model2.entr) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) +#endclass TestLogEntropyModel + + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) + unittest.main() diff --git a/gensim/test/test_lsimodel.py b/gensim/test/test_lsimodel.py new file mode 100644 index 0000000000..457725ebbb --- /dev/null +++ b/gensim/test/test_lsimodel.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2010 Radim Rehurek +# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html + +""" +Automated tests for checking transformation algorithms (the models package). +""" + + +import logging +import unittest +import os +import os.path +import tempfile + +import six +import numpy +import scipy.linalg + +from gensim.corpora import mmcorpus, Dictionary +from gensim.models import lsimodel +from gensim import matutils + + +module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder +datapath = lambda fname: os.path.join(module_path, 'test_data', fname) + + +# set up vars used in testing ("Deerwester" from the web tutorial) +texts = [['human', 'interface', 'computer'], + ['survey', 'user', 'computer', 'system', 'response', 'time'], + ['eps', 'user', 'interface', 'system'], + ['system', 'human', 'system', 'eps'], + ['user', 'response', 'time'], + ['trees'], + ['graph', 'trees'], + ['graph', 'minors', 'trees'], + ['graph', 'minors', 'survey']] +dictionary = Dictionary(texts) +corpus = [dictionary.doc2bow(text) for text in texts] + + +def testfile(): + # temporary data will be stored to this file + return os.path.join(tempfile.gettempdir(), 'gensim_models.tst') + + +class TestLsiModel(unittest.TestCase): + def setUp(self): + self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) + self.model = lsimodel.LsiModel(self.corpus, num_topics=2) + + def testTransform(self): + """Test lsi[vector] transformation.""" + # create the transformation model + model = self.model + + # make sure the decomposition is enough accurate + u, s, vt = scipy.linalg.svd(matutils.corpus2dense(self.corpus, self.corpus.num_terms), full_matrices=False) + self.assertTrue(numpy.allclose(s[:2], model.projection.s)) # singular values must match + + # transform one document + doc = list(self.corpus)[0] + transformed = model[doc] + vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests + expected = numpy.array([-0.6594664, 0.142115444]) # scaled LSI version + # expected = numpy.array([-0.1973928, 0.05591352]) # non-scaled LSI version + self.assertTrue(numpy.allclose(abs(vec), abs(expected))) # transformed entries must be equal up to sign + + + def testShowTopic(self): + topic = self.model.show_topic(1) + + for k, v in topic: + self.assertTrue(isinstance(k, six.string_types)) + self.assertTrue(isinstance(v, float)) + + + def testShowTopics(self): + topics = self.model.show_topics(formatted=False) + + for topic_no, topic in topics: + self.assertTrue(isinstance(topic_no, int)) + self.assertTrue(isinstance(topic, list)) + for k, v in topic: + self.assertTrue(isinstance(k, six.string_types)) + self.assertTrue(isinstance(v, float)) + + + def testCorpusTransform(self): + """Test lsi[corpus] transformation.""" + model = self.model + got = numpy.vstack(matutils.sparse2full(doc, 2) for doc in model[self.corpus]) + expected = numpy.array([ + [ 0.65946639, 0.14211544], + [ 2.02454305, -0.42088759], + [ 1.54655361, 0.32358921], + [ 1.81114125, 0.5890525 ], + [ 0.9336738 , -0.27138939], + [ 0.01274618, -0.49016181], + [ 0.04888203, -1.11294699], + [ 0.08063836, -1.56345594], + [ 0.27381003, -1.34694159]]) + self.assertTrue(numpy.allclose(abs(got), abs(expected))) # must equal up to sign + + + def testOnlineTransform(self): + corpus = list(self.corpus) + doc = corpus[0] # use the corpus' first document for testing + + # create the transformation model + model2 = lsimodel.LsiModel(corpus=corpus, num_topics=5) # compute everything at once + model = lsimodel.LsiModel(corpus=None, id2word=model2.id2word, num_topics=5) # start with no documents, we will add them later + + # train model on a single document + model.add_documents([corpus[0]]) + + # transform the testing document with this partial transformation + transformed = model[doc] + vec = matutils.sparse2full(transformed, model.num_topics) # convert to dense vector, for easier equality tests + expected = numpy.array([-1.73205078, 0.0, 0.0, 0.0, 0.0]) # scaled LSI version + self.assertTrue(numpy.allclose(abs(vec), abs(expected), atol=1e-6)) # transformed entries must be equal up to sign + + # train on another 4 documents + model.add_documents(corpus[1:5], chunksize=2) # train on 4 extra docs, in chunks of 2 documents, for the lols + + # transform a document with this partial transformation + transformed = model[doc] + vec = matutils.sparse2full(transformed, model.num_topics) # convert to dense vector, for easier equality tests + expected = numpy.array([-0.66493785, -0.28314203, -1.56376302, 0.05488682, 0.17123269]) # scaled LSI version + self.assertTrue(numpy.allclose(abs(vec), abs(expected), atol=1e-6)) # transformed entries must be equal up to sign + + # train on the rest of documents + model.add_documents(corpus[5:]) + + # make sure the final transformation is the same as if we had decomposed the whole corpus at once + vec1 = matutils.sparse2full(model[doc], model.num_topics) + vec2 = matutils.sparse2full(model2[doc], model2.num_topics) + self.assertTrue(numpy.allclose(abs(vec1), abs(vec2), atol=1e-5)) # the two LSI representations must equal up to sign + + + def testPersistence(self): + fname = testfile() + model = self.model + model.save(fname) + model2 = lsimodel.LsiModel.load(fname) + self.assertEqual(model.num_topics, model2.num_topics) + self.assertTrue(numpy.allclose(model.projection.u, model2.projection.u)) + self.assertTrue(numpy.allclose(model.projection.s, model2.projection.s)) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector + + def testPersistenceCompressed(self): + fname = testfile() + '.gz' + model = self.model + model.save(fname) + model2 = lsimodel.LsiModel.load(fname, mmap=None) + self.assertEqual(model.num_topics, model2.num_topics) + self.assertTrue(numpy.allclose(model.projection.u, model2.projection.u)) + self.assertTrue(numpy.allclose(model.projection.s, model2.projection.s)) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector + + def testLargeMmap(self): + fname = testfile() + model = self.model + + # test storing the internal arrays into separate files + model.save(fname, sep_limit=0) + + # now load the external arrays via mmap + model2 = lsimodel.LsiModel.load(fname, mmap='r') + self.assertEqual(model.num_topics, model2.num_topics) + self.assertTrue(isinstance(model2.projection.u, numpy.memmap)) + self.assertTrue(isinstance(model2.projection.s, numpy.memmap)) + self.assertTrue(numpy.allclose(model.projection.u, model2.projection.u)) + self.assertTrue(numpy.allclose(model.projection.s, model2.projection.s)) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector + + def testLargeMmapCompressed(self): + fname = testfile() + '.gz' + model = self.model + + # test storing the internal arrays into separate files + model.save(fname, sep_limit=0) + + # now load the external arrays via mmap + return + + # turns out this test doesn't exercise this because there are no arrays + # to be mmaped! + self.assertRaises(IOError, lsimodel.LsiModel.load, fname, mmap='r') + +#endclass TestLsiModel + + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) + unittest.main() diff --git a/gensim/test/test_models.py b/gensim/test/test_models.py deleted file mode 100644 index 038d5be2c2..0000000000 --- a/gensim/test/test_models.py +++ /dev/null @@ -1,557 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2010 Radim Rehurek -# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html - -""" -Automated tests for checking transformation algorithms (the models package). -""" - - -import logging -import unittest -import os -import os.path -import tempfile - -import numpy -import scipy.linalg - -from gensim.corpora import mmcorpus, Dictionary -from gensim.models import lsimodel, ldamodel, tfidfmodel, rpmodel, logentropy_model, ldamulticore -from gensim.models.wrappers import ldamallet -from gensim import matutils - - -module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder -datapath = lambda fname: os.path.join(module_path, 'test_data', fname) - - -# set up vars used in testing ("Deerwester" from the web tutorial) -texts = [['human', 'interface', 'computer'], - ['survey', 'user', 'computer', 'system', 'response', 'time'], - ['eps', 'user', 'interface', 'system'], - ['system', 'human', 'system', 'eps'], - ['user', 'response', 'time'], - ['trees'], - ['graph', 'trees'], - ['graph', 'minors', 'trees'], - ['graph', 'minors', 'survey']] -dictionary = Dictionary(texts) -corpus = [dictionary.doc2bow(text) for text in texts] - - -def testfile(): - # temporary data will be stored to this file - return os.path.join(tempfile.gettempdir(), 'gensim_models.tst') - - -class TestLsiModel(unittest.TestCase): - def setUp(self): - self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) - - def testTransform(self): - """Test lsi[vector] transformation.""" - # create the transformation model - model = lsimodel.LsiModel(self.corpus, num_topics=2) - - # make sure the decomposition is enough accurate - u, s, vt = scipy.linalg.svd(matutils.corpus2dense(self.corpus, self.corpus.num_terms), full_matrices=False) - self.assertTrue(numpy.allclose(s[:2], model.projection.s)) # singular values must match - - # transform one document - doc = list(self.corpus)[0] - transformed = model[doc] - vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests - expected = numpy.array([-0.6594664, 0.142115444]) # scaled LSI version - # expected = numpy.array([-0.1973928, 0.05591352]) # non-scaled LSI version - self.assertTrue(numpy.allclose(abs(vec), abs(expected))) # transformed entries must be equal up to sign - - - def testCorpusTransform(self): - """Test lsi[corpus] transformation.""" - model = lsimodel.LsiModel(self.corpus, num_topics=2) - got = numpy.vstack(matutils.sparse2full(doc, 2) for doc in model[self.corpus]) - expected = numpy.array([ - [ 0.65946639, 0.14211544], - [ 2.02454305, -0.42088759], - [ 1.54655361, 0.32358921], - [ 1.81114125, 0.5890525 ], - [ 0.9336738 , -0.27138939], - [ 0.01274618, -0.49016181], - [ 0.04888203, -1.11294699], - [ 0.08063836, -1.56345594], - [ 0.27381003, -1.34694159]]) - self.assertTrue(numpy.allclose(abs(got), abs(expected))) # must equal up to sign - - - def testOnlineTransform(self): - corpus = list(self.corpus) - doc = corpus[0] # use the corpus' first document for testing - - # create the transformation model - model2 = lsimodel.LsiModel(corpus=corpus, num_topics=5) # compute everything at once - model = lsimodel.LsiModel(corpus=None, id2word=model2.id2word, num_topics=5) # start with no documents, we will add them later - - # train model on a single document - model.add_documents([corpus[0]]) - - # transform the testing document with this partial transformation - transformed = model[doc] - vec = matutils.sparse2full(transformed, model.num_topics) # convert to dense vector, for easier equality tests - expected = numpy.array([-1.73205078, 0.0, 0.0, 0.0, 0.0]) # scaled LSI version - self.assertTrue(numpy.allclose(abs(vec), abs(expected), atol=1e-6)) # transformed entries must be equal up to sign - - # train on another 4 documents - model.add_documents(corpus[1:5], chunksize=2) # train on 4 extra docs, in chunks of 2 documents, for the lols - - # transform a document with this partial transformation - transformed = model[doc] - vec = matutils.sparse2full(transformed, model.num_topics) # convert to dense vector, for easier equality tests - expected = numpy.array([-0.66493785, -0.28314203, -1.56376302, 0.05488682, 0.17123269]) # scaled LSI version - self.assertTrue(numpy.allclose(abs(vec), abs(expected), atol=1e-6)) # transformed entries must be equal up to sign - - # train on the rest of documents - model.add_documents(corpus[5:]) - - # make sure the final transformation is the same as if we had decomposed the whole corpus at once - vec1 = matutils.sparse2full(model[doc], model.num_topics) - vec2 = matutils.sparse2full(model2[doc], model2.num_topics) - self.assertTrue(numpy.allclose(abs(vec1), abs(vec2), atol=1e-5)) # the two LSI representations must equal up to sign - - - def testPersistence(self): - fname = testfile() - model = lsimodel.LsiModel(self.corpus, num_topics=2) - model.save(fname) - model2 = lsimodel.LsiModel.load(fname) - self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(numpy.allclose(model.projection.u, model2.projection.u)) - self.assertTrue(numpy.allclose(model.projection.s, model2.projection.s)) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - - def testPersistenceCompressed(self): - fname = testfile() + '.gz' - model = lsimodel.LsiModel(self.corpus, num_topics=2) - model.save(fname) - model2 = lsimodel.LsiModel.load(fname, mmap=None) - self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(numpy.allclose(model.projection.u, model2.projection.u)) - self.assertTrue(numpy.allclose(model.projection.s, model2.projection.s)) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - - def testLargeMmap(self): - fname = testfile() - model = lsimodel.LsiModel(self.corpus, num_topics=2) - - # test storing the internal arrays into separate files - model.save(fname, sep_limit=0) - - # now load the external arrays via mmap - model2 = lsimodel.LsiModel.load(fname, mmap='r') - self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(isinstance(model2.projection.u, numpy.memmap)) - self.assertTrue(isinstance(model2.projection.s, numpy.memmap)) - self.assertTrue(numpy.allclose(model.projection.u, model2.projection.u)) - self.assertTrue(numpy.allclose(model.projection.s, model2.projection.s)) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - - def testLargeMmapCompressed(self): - fname = testfile() + '.gz' - model = lsimodel.LsiModel(self.corpus, num_topics=2) - - # test storing the internal arrays into separate files - model.save(fname, sep_limit=0) - - # now load the external arrays via mmap - return - - # turns out this test doesn't exercise this because there are no arrays - # to be mmaped! - self.assertRaises(IOError, lsimodel.LsiModel.load, fname, mmap='r') - -#endclass TestLsiModel - - -class TestRpModel(unittest.TestCase): - def setUp(self): - self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) - - def testTransform(self): - # create the transformation model - numpy.random.seed(13) # HACK; set fixed seed so that we always get the same random matrix (and can compare against expected results) - model = rpmodel.RpModel(self.corpus, num_topics=2) - - # transform one document - doc = list(self.corpus)[0] - transformed = model[doc] - vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests - - expected = numpy.array([-0.70710677, 0.70710677]) - self.assertTrue(numpy.allclose(vec, expected)) # transformed entries must be equal up to sign - - - def testPersistence(self): - fname = testfile() - model = rpmodel.RpModel(self.corpus, num_topics=2) - model.save(fname) - model2 = rpmodel.RpModel.load(fname) - self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(numpy.allclose(model.projection, model2.projection)) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - - def testPersistenceCompressed(self): - fname = testfile() + '.gz' - model = rpmodel.RpModel(self.corpus, num_topics=2) - model.save(fname) - model2 = rpmodel.RpModel.load(fname, mmap=None) - self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(numpy.allclose(model.projection, model2.projection)) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector -#endclass TestRpModel - - -class TestLdaModel(unittest.TestCase): - def setUp(self): - self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) - self.class_ = ldamodel.LdaModel - - def testTransform(self): - passed = False - # sometimes, LDA training gets stuck at a local minimum - # in that case try re-training the model from scratch, hoping for a - # better random initialization - for i in range(5): # restart at most 5 times - # create the transformation model - model = self.class_(id2word=dictionary, num_topics=2, passes=100) - model.update(self.corpus) - - # transform one document - doc = list(corpus)[0] - transformed = model[doc] - - vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests - expected = [0.13, 0.87] - passed = numpy.allclose(sorted(vec), sorted(expected), atol=1e-2) # must contain the same values, up to re-ordering - if passed: - break - logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % - (i, sorted(vec), sorted(expected))) - self.assertTrue(passed) - - def testTopTopics(self): - # create the transformation model - model = self.class_(id2word=dictionary, num_topics=2, passes=100) - model.update(self.corpus) - - model.top_topics(self.corpus) - - def testPasses(self): - # long message includes the original error message with a custom one - self.longMessage = True - - # construct what we expect when passes aren't involved - test_rhots = list() - model = self.class_(id2word=dictionary, chunksize=1, num_topics=2) - final_rhot = lambda: pow(model.offset + (1 * model.num_updates) / model.chunksize, -model.decay) - - # generate 5 updates to test rhot on - for x in range(5): - model.update(self.corpus) - test_rhots.append(final_rhot()) - - for passes in [1, 5, 10, 50, 100]: - model = self.class_(id2word=dictionary, chunksize=1, num_topics=2, passes=passes) - self.assertEqual(final_rhot(), 1.0) - # make sure the rhot matches the test after each update - for test_rhot in test_rhots: - model.update(self.corpus) - - msg = ", ".join(map(str, [passes, model.num_updates, model.state.numdocs])) - self.assertAlmostEqual(final_rhot(), test_rhot, msg=msg) - - self.assertEqual(model.state.numdocs, len(corpus) * len(test_rhots)) - self.assertEqual(model.num_updates, len(corpus) * len(test_rhots)) - - # def testTopicSeeding(self): - # for topic in range(2): - # passed = False - # for i in range(5): # restart at most this many times, to mitigate LDA randomness - # # try seeding it both ways round, check you get the same - # # topics out but with which way round they are depending - # # on the way round they're seeded - # eta = numpy.ones((2, len(dictionary))) * 0.5 - # system = dictionary.token2id[u'system'] - # trees = dictionary.token2id[u'trees'] - - # # aggressively seed the word 'system', in one of the - # # two topics, 10 times higher than the other words - # eta[topic, system] *= 10.0 - - # model = self.class_(id2word=dictionary, num_topics=2, passes=200, eta=eta) - # model.update(self.corpus) - - # topics = [dict((word, p) for p, word in model.show_topic(j, topn=None)) for j in range(2)] - - # # check that the word 'system' in the topic we seeded got a high weight, - # # and the word 'trees' (the main word in the other topic) a low weight -- - # # and vice versa for the other topic (which we didn't seed with 'system') - # passed = ( - # (topics[topic][u'system'] > topics[topic][u'trees']) - # and - # (topics[1 - topic][u'system'] < topics[1 - topic][u'trees']) - # ) - # if passed: - # break - # logging.warning("LDA failed to converge on attempt %i (got %s)", i, topics) - # self.assertTrue(passed) - - def testPersistence(self): - fname = testfile() - model = self.class_(self.corpus, num_topics=2) - model.save(fname) - model2 = self.class_.load(fname) - self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - - def testPersistenceIgnore(self): - fname = testfile() - model = ldamodel.LdaModel(self.corpus, num_topics=2) - model.save(fname, ignore='id2word') - model2 = ldamodel.LdaModel.load(fname) - self.assertTrue(model2.id2word is None) - - model.save(fname, ignore=['id2word']) - model2 = ldamodel.LdaModel.load(fname) - self.assertTrue(model2.id2word is None) - - def testPersistenceCompressed(self): - fname = testfile() + '.gz' - model = self.class_(self.corpus, num_topics=2) - model.save(fname) - model2 = self.class_.load(fname, mmap=None) - self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - - def testLargeMmap(self): - fname = testfile() - model = self.class_(self.corpus, num_topics=2) - - # simulate storing large arrays separately - model.save(testfile(), sep_limit=0) - - # test loading the large model arrays with mmap - model2 = self.class_.load(testfile(), mmap='r') - self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(isinstance(model2.expElogbeta, numpy.memmap)) - self.assertTrue(numpy.allclose(model.expElogbeta, model2.expElogbeta)) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - - def testLargeMmapCompressed(self): - fname = testfile() + '.gz' - model = self.class_(self.corpus, num_topics=2) - - # simulate storing large arrays separately - model.save(fname, sep_limit=0) - - # test loading the large model arrays with mmap - self.assertRaises(IOError, self.class_.load, fname, mmap='r') -#endclass TestLdaModel - - -class TestLdaMulticore(TestLdaModel): - def setUp(self): - self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) - self.class_ = ldamulticore.LdaMulticore - -#endclass TestLdaMulticore - - -class TestLdaMallet(unittest.TestCase): - def setUp(self): - self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) - mallet_home = os.environ.get('MALLET_HOME', None) - self.mallet_path = os.path.join(mallet_home, 'bin', 'mallet') if mallet_home else None - - def testTransform(self): - if not self.mallet_path: - return - passed = False - for i in range(5): # restart at most 5 times - # create the transformation model - model = ldamallet.LdaMallet(self.mallet_path, corpus, id2word=dictionary, num_topics=2, iterations=200) - - # transform one document - doc = list(corpus)[0] - transformed = model[doc] - - vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests - expected = [0.49, 0.51] - passed = numpy.allclose(sorted(vec), sorted(expected), atol=1e-2) # must contain the same values, up to re-ordering - if passed: - break - logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" % - (i, sorted(vec), sorted(expected))) - self.assertTrue(passed) - - - def testPersistence(self): - if not self.mallet_path: - return - fname = testfile() - model = ldamallet.LdaMallet(self.mallet_path, self.corpus, num_topics=2, iterations=100) - model.save(fname) - model2 = ldamallet.LdaMallet.load(fname) - self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(numpy.allclose(model.wordtopics, model2.wordtopics)) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - - def testPersistenceCompressed(self): - if not self.mallet_path: - return - fname = testfile() + '.gz' - model = ldamallet.LdaMallet(self.mallet_path, self.corpus, num_topics=2, iterations=100) - model.save(fname) - model2 = ldamallet.LdaMallet.load(fname, mmap=None) - self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(numpy.allclose(model.wordtopics, model2.wordtopics)) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - - def testLargeMmap(self): - if not self.mallet_path: - return - fname = testfile() - model = ldamallet.LdaMallet(self.mallet_path, self.corpus, num_topics=2, iterations=100) - - # simulate storing large arrays separately - model.save(testfile(), sep_limit=0) - - # test loading the large model arrays with mmap - model2 = ldamodel.LdaModel.load(testfile(), mmap='r') - self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(isinstance(model2.wordtopics, numpy.memmap)) - self.assertTrue(numpy.allclose(model.wordtopics, model2.wordtopics)) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - - def testLargeMmapCompressed(self): - if not self.mallet_path: - return - fname = testfile() + '.gz' - model = ldamallet.LdaMallet(self.mallet_path, self.corpus, num_topics=2, iterations=100) - - # simulate storing large arrays separately - model.save(fname, sep_limit=0) - - # test loading the large model arrays with mmap - self.assertRaises(IOError, ldamodel.LdaModel.load, fname, mmap='r') -#endclass TestLdaMallet - - -class TestTfidfModel(unittest.TestCase): - def setUp(self): - self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) - - def testTransform(self): - # create the transformation model - model = tfidfmodel.TfidfModel(self.corpus, normalize=True) - - # transform one document - doc = list(self.corpus)[0] - transformed = model[doc] - - expected = [(0, 0.57735026918962573), (1, 0.57735026918962573), (2, 0.57735026918962573)] - self.assertTrue(numpy.allclose(transformed, expected)) - - - def testInit(self): - # create the transformation model by analyzing a corpus - # uses the global `corpus`! - model1 = tfidfmodel.TfidfModel(corpus) - - # make sure the dfs<->idfs transformation works - self.assertEqual(model1.dfs, dictionary.dfs) - self.assertEqual(model1.idfs, tfidfmodel.precompute_idfs(model1.wglobal, dictionary.dfs, len(corpus))) - - # create the transformation model by directly supplying a term->docfreq - # mapping from the global var `dictionary`. - model2 = tfidfmodel.TfidfModel(dictionary=dictionary) - self.assertEqual(model1.idfs, model2.idfs) - - - def testPersistence(self): - fname = testfile() - model = tfidfmodel.TfidfModel(self.corpus, normalize=True) - model.save(fname) - model2 = tfidfmodel.TfidfModel.load(fname) - self.assertTrue(model.idfs == model2.idfs) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector - - def testPersistenceCompressed(self): - fname = testfile() + '.gz' - model = tfidfmodel.TfidfModel(self.corpus, normalize=True) - model.save(fname) - model2 = tfidfmodel.TfidfModel.load(fname, mmap=None) - self.assertTrue(model.idfs == model2.idfs) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector -#endclass TestTfidfModel - - -class TestLogEntropyModel(unittest.TestCase): - def setUp(self): - self.corpus_small = mmcorpus.MmCorpus(datapath('test_corpus_small.mm')) - self.corpus_ok = mmcorpus.MmCorpus(datapath('test_corpus_ok.mm')) - - - def testTransform(self): - # create the transformation model - model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=False) - - # transform one document - doc = list(self.corpus_ok)[0] - transformed = model[doc] - - expected = [(0, 0.3748900964125389), - (1, 0.30730215324230725), - (3, 1.20941755462856)] - self.assertTrue(numpy.allclose(transformed, expected)) - - - def testPersistence(self): - fname = testfile() - model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=True) - model.save(fname) - model2 = logentropy_model.LogEntropyModel.load(fname) - self.assertTrue(model.entr == model2.entr) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) - - def testPersistenceCompressed(self): - fname = testfile() + '.gz' - model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=True) - model.save(fname) - model2 = logentropy_model.LogEntropyModel.load(fname, mmap=None) - self.assertTrue(model.entr == model2.entr) - tstvec = [] - self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) -#endclass TestLogEntropyModel - - -if __name__ == '__main__': - logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) - unittest.main() diff --git a/gensim/test/test_rpmodel.py b/gensim/test/test_rpmodel.py new file mode 100644 index 0000000000..299681f49f --- /dev/null +++ b/gensim/test/test_rpmodel.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2010 Radim Rehurek +# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html + +""" +Automated tests for checking transformation algorithms (the models package). +""" + + +import logging +import unittest +import os +import os.path +import tempfile + +import six +import numpy +import scipy.linalg + +from gensim.corpora import mmcorpus, Dictionary +from gensim.models import rpmodel +from gensim import matutils + +module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder +datapath = lambda fname: os.path.join(module_path, 'test_data', fname) + + +# set up vars used in testing ("Deerwester" from the web tutorial) +texts = [['human', 'interface', 'computer'], + ['survey', 'user', 'computer', 'system', 'response', 'time'], + ['eps', 'user', 'interface', 'system'], + ['system', 'human', 'system', 'eps'], + ['user', 'response', 'time'], + ['trees'], + ['graph', 'trees'], + ['graph', 'minors', 'trees'], + ['graph', 'minors', 'survey']] +dictionary = Dictionary(texts) +corpus = [dictionary.doc2bow(text) for text in texts] + + +def testfile(): + # temporary data will be stored to this file + return os.path.join(tempfile.gettempdir(), 'gensim_models.tst') + + + +class TestRpModel(unittest.TestCase): + def setUp(self): + self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) + + def testTransform(self): + # create the transformation model + numpy.random.seed(13) # HACK; set fixed seed so that we always get the same random matrix (and can compare against expected results) + model = rpmodel.RpModel(self.corpus, num_topics=2) + + # transform one document + doc = list(self.corpus)[0] + transformed = model[doc] + vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests + + expected = numpy.array([-0.70710677, 0.70710677]) + self.assertTrue(numpy.allclose(vec, expected)) # transformed entries must be equal up to sign + + + def testPersistence(self): + fname = testfile() + model = rpmodel.RpModel(self.corpus, num_topics=2) + model.save(fname) + model2 = rpmodel.RpModel.load(fname) + self.assertEqual(model.num_topics, model2.num_topics) + self.assertTrue(numpy.allclose(model.projection, model2.projection)) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector + + def testPersistenceCompressed(self): + fname = testfile() + '.gz' + model = rpmodel.RpModel(self.corpus, num_topics=2) + model.save(fname) + model2 = rpmodel.RpModel.load(fname, mmap=None) + self.assertEqual(model.num_topics, model2.num_topics) + self.assertTrue(numpy.allclose(model.projection, model2.projection)) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector +#endclass TestRpModel + + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) + unittest.main() diff --git a/gensim/test/test_summarization.py b/gensim/test/test_summarization.py index 909a3c60ac..7255252a57 100644 --- a/gensim/test/test_summarization.py +++ b/gensim/test/test_summarization.py @@ -13,11 +13,12 @@ """ import os.path +import logging import unittest from gensim import utils from gensim.corpora import Dictionary -from gensim.summarization import summarize, summarize_corpus +from gensim.summarization import summarize, summarize_corpus, keywords class TestSummarizationTest(unittest.TestCase): @@ -128,3 +129,22 @@ def test_corpus_summarization_ratio(self): expected_summary_length = int(len(corpus) * ratio) self.assertEqual(len(selected_docs), expected_summary_length) + + def test_keywords_runs(self): + pre_path = os.path.join(os.path.dirname(__file__), 'test_data') + + with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.txt")) as f: + text = f.read() + + kwds = keywords(text) + self.assertTrue(len(kwds.splitlines())) + + kwds_u = keywords(utils.to_unicode(text)) + self.assertTrue(len(kwds_u.splitlines())) + + kwds_lst = keywords(text, split=True) + self.assertTrue(len(kwds_lst)) + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) + unittest.main() diff --git a/gensim/test/test_tfidfmodel.py b/gensim/test/test_tfidfmodel.py new file mode 100644 index 0000000000..10e75edd78 --- /dev/null +++ b/gensim/test/test_tfidfmodel.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2010 Radim Rehurek +# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html + +""" +Automated tests for checking transformation algorithms (the models package). +""" + + +import logging +import unittest +import os +import os.path +import tempfile + +import six +import numpy +import scipy.linalg + +from gensim.corpora import mmcorpus, Dictionary +from gensim.models import tfidfmodel +from gensim import matutils + +module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder +datapath = lambda fname: os.path.join(module_path, 'test_data', fname) + + +# set up vars used in testing ("Deerwester" from the web tutorial) +texts = [['human', 'interface', 'computer'], + ['survey', 'user', 'computer', 'system', 'response', 'time'], + ['eps', 'user', 'interface', 'system'], + ['system', 'human', 'system', 'eps'], + ['user', 'response', 'time'], + ['trees'], + ['graph', 'trees'], + ['graph', 'minors', 'trees'], + ['graph', 'minors', 'survey']] +dictionary = Dictionary(texts) +corpus = [dictionary.doc2bow(text) for text in texts] + + +def testfile(): + # temporary data will be stored to this file + return os.path.join(tempfile.gettempdir(), 'gensim_models.tst') + + + +class TestTfidfModel(unittest.TestCase): + def setUp(self): + self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) + + def testTransform(self): + # create the transformation model + model = tfidfmodel.TfidfModel(self.corpus, normalize=True) + + # transform one document + doc = list(self.corpus)[0] + transformed = model[doc] + + expected = [(0, 0.57735026918962573), (1, 0.57735026918962573), (2, 0.57735026918962573)] + self.assertTrue(numpy.allclose(transformed, expected)) + + + def testInit(self): + # create the transformation model by analyzing a corpus + # uses the global `corpus`! + model1 = tfidfmodel.TfidfModel(corpus) + + # make sure the dfs<->idfs transformation works + self.assertEqual(model1.dfs, dictionary.dfs) + self.assertEqual(model1.idfs, tfidfmodel.precompute_idfs(model1.wglobal, dictionary.dfs, len(corpus))) + + # create the transformation model by directly supplying a term->docfreq + # mapping from the global var `dictionary`. + model2 = tfidfmodel.TfidfModel(dictionary=dictionary) + self.assertEqual(model1.idfs, model2.idfs) + + + def testPersistence(self): + fname = testfile() + model = tfidfmodel.TfidfModel(self.corpus, normalize=True) + model.save(fname) + model2 = tfidfmodel.TfidfModel.load(fname) + self.assertTrue(model.idfs == model2.idfs) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector + + def testPersistenceCompressed(self): + fname = testfile() + '.gz' + model = tfidfmodel.TfidfModel(self.corpus, normalize=True) + model.save(fname) + model2 = tfidfmodel.TfidfModel.load(fname, mmap=None) + self.assertTrue(model.idfs == model2.idfs) + tstvec = [] + self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector +#endclass TestTfidfModel + + + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) + unittest.main() diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py index cf749d0a82..a1c6664c70 100644 --- a/gensim/test/test_word2vec.py +++ b/gensim/test/test_word2vec.py @@ -50,6 +50,11 @@ def testfile(): # temporary data will be stored to this file return os.path.join(tempfile.gettempdir(), 'gensim_word2vec.tst') +def rule_for_testing(word, count, min_count): + if word == "human": + return utils.RULE_DISCARD # throw out + else: + return utils.RULE_DEFAULT # apply default rule, i.e. min_count class TestWord2VecModel(unittest.TestCase): def testPersistence(self): @@ -58,6 +63,31 @@ def testPersistence(self): model.save(testfile()) self.models_equal(model, word2vec.Word2Vec.load(testfile())) + def testPersistenceWithConstructorRule(self): + """Test storing/loading the entire model with a vocab trimming rule passed in the constructor.""" + model = word2vec.Word2Vec(sentences, min_count=1, trim_rule=rule_for_testing) + model.save(testfile()) + self.models_equal(model, word2vec.Word2Vec.load(testfile())) + + def testRuleWithMinCount(self): + """Test that returning RULE_DEFAULT from trim_rule triggers min_count.""" + model = word2vec.Word2Vec(sentences + [["occurs_only_once"]], min_count=2, trim_rule=rule_for_testing) + self.assertTrue("human" not in model.vocab) + self.assertTrue("occurs_only_once" not in model.vocab) + self.assertTrue("interface" in model.vocab) + + def testRule(self): + """Test applying vocab trim_rule to build_vocab instead of constructor.""" + model = word2vec.Word2Vec(min_count=1) + model.build_vocab(sentences, trim_rule=rule_for_testing) + self.assertTrue("human" not in model.vocab) + + def testLambdaRule(self): + """Test that lambda trim_rule works.""" + rule = lambda word, count, min_count: utils.RULE_DISCARD if word == "human" else utils.RULE_DEFAULT + model = word2vec.Word2Vec(sentences, min_count=1, trim_rule=rule) + self.assertTrue("human" not in model.vocab) + def testPersistenceWord2VecFormat(self): """Test storing/loading the entire model in word2vec format.""" model = word2vec.Word2Vec(sentences, min_count=1) diff --git a/gensim/utils.py b/gensim/utils.py index 72ae014d8a..7e897eb795 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -11,6 +11,7 @@ from __future__ import with_statement import logging + logger = logging.getLogger(__name__) try: @@ -805,7 +806,6 @@ def chunkize_serial(iterable, chunksize, as_numpy=False): grouper = chunkize_serial - class InputQueue(multiprocessing.Process): def __init__(self, q, corpus, chunksize, maxsize, as_numpy): super(InputQueue, self).__init__() @@ -908,7 +908,7 @@ def pickle(obj, fname, protocol=2): Python 2.x and 3.x. """ - with smart_open(fname, 'wb') as fout: # 'b' for binary, needed on Windows + with smart_open(fname, 'wb') as fout: # 'b' for binary, needed on Windows _pickle.dump(obj, fout, protocol=protocol) @@ -1089,7 +1089,7 @@ def mock_data(n_items=1000, dim=1000, prob_nnz=0.5, lam=1.0): return data -def prune_vocab(vocab, min_reduce): +def prune_vocab(vocab, min_reduce, trim_rule=None): """ Remove all entries from the `vocab` dictionary with count smaller than `min_reduce`. @@ -1099,9 +1099,37 @@ def prune_vocab(vocab, min_reduce): result = 0 old_len = len(vocab) for w in list(vocab): # make a copy of dict's keys - if vocab[w] <= min_reduce: + if not keep_vocab_item(w, vocab[w], min_reduce, trim_rule): # vocab[w] <= min_reduce: result += vocab[w] del vocab[w] logger.info("pruned out %i tokens with count <=%i (before %i, after %i)", old_len - len(vocab), min_reduce, old_len, len(vocab)) return result + + +def qsize(queue): + """Return the (approximate) queue size where available; -1 where not (OS X).""" + try: + return queue.qsize() + except NotImplementedError: + # OS X doesn't support qsize + return -1 + +RULE_DEFAULT = 0 +RULE_DISCARD = 1 +RULE_KEEP = 2 + + +def keep_vocab_item(word, count, min_count, trim_rule=None): + default_res = count >= min_count + + if trim_rule is None: + return default_res + else: + rule_res = trim_rule(word, count, min_count) + if rule_res == RULE_KEEP: + return True + elif rule_res == RULE_DISCARD: + return False + else: + return default_res diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 5e4090017a..0000000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[wheel] -universal = 1 diff --git a/setup.py b/setup.py index 3270d7f401..ed53a50229 100644 --- a/setup.py +++ b/setup.py @@ -109,7 +109,7 @@ def readfile(fname): setup( name='gensim', - version='0.12.1', + version='0.12.2', description='Python framework for fast Vector Space Modelling', long_description=readfile('README.rst'),