diff --git a/gensim/models/count_words_inner.c b/gensim/models/count_words_inner.c new file mode 100644 index 0000000000..56389085f4 --- /dev/null +++ b/gensim/models/count_words_inner.c @@ -0,0 +1,4572 @@ +/* Generated by Cython 0.23.4 */ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#ifndef Py_PYTHON_H + #error Python headers needed to compile C extensions, please install development version of Python. +#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03020000) + #error Cython requires Python 2.6+ or Python 3.2+. +#else +#define CYTHON_ABI "0_23_4" +#include +#ifndef offsetof +#define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) +#endif +#if !defined(WIN32) && !defined(MS_WINDOWS) + #ifndef __stdcall + #define __stdcall + #endif + #ifndef __cdecl + #define __cdecl + #endif + #ifndef __fastcall + #define __fastcall + #endif +#endif +#ifndef DL_IMPORT + #define DL_IMPORT(t) t +#endif +#ifndef DL_EXPORT + #define DL_EXPORT(t) t +#endif +#ifndef PY_LONG_LONG + #define PY_LONG_LONG LONG_LONG +#endif +#ifndef Py_HUGE_VAL + #define Py_HUGE_VAL HUGE_VAL +#endif +#ifdef PYPY_VERSION +#define CYTHON_COMPILING_IN_PYPY 1 +#define CYTHON_COMPILING_IN_CPYTHON 0 +#else +#define CYTHON_COMPILING_IN_PYPY 0 +#define CYTHON_COMPILING_IN_CPYTHON 1 +#endif +#if !defined(CYTHON_USE_PYLONG_INTERNALS) && CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x02070000 +#define CYTHON_USE_PYLONG_INTERNALS 1 +#endif +#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag) +#define Py_OptimizeFlag 0 +#endif +#define __PYX_BUILD_PY_SSIZE_T "n" +#define CYTHON_FORMAT_SSIZE_T "z" +#if PY_MAJOR_VERSION < 3 + #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" + #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) + #define __Pyx_DefaultClassType PyClass_Type +#else + #define __Pyx_BUILTIN_MODULE_NAME "builtins" + #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) + #define __Pyx_DefaultClassType PyType_Type +#endif +#ifndef Py_TPFLAGS_CHECKTYPES + #define Py_TPFLAGS_CHECKTYPES 0 +#endif +#ifndef Py_TPFLAGS_HAVE_INDEX + #define Py_TPFLAGS_HAVE_INDEX 0 +#endif +#ifndef Py_TPFLAGS_HAVE_NEWBUFFER + #define Py_TPFLAGS_HAVE_NEWBUFFER 0 +#endif +#ifndef Py_TPFLAGS_HAVE_FINALIZE + #define Py_TPFLAGS_HAVE_FINALIZE 0 +#endif +#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) + #define CYTHON_PEP393_ENABLED 1 + #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ + 0 : _PyUnicode_Ready((PyObject *)(op))) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) + #define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u) + #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) + #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) +#else + #define CYTHON_PEP393_ENABLED 0 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) + #define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE)) + #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) +#endif +#if CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) +#else + #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ + PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains) + #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) +#endif +#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) +#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) +#else + #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBaseString_Type PyUnicode_Type + #define PyStringObject PyUnicodeObject + #define PyString_Type PyUnicode_Type + #define PyString_Check PyUnicode_Check + #define PyString_CheckExact PyUnicode_CheckExact +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) + #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) +#else + #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) + #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) +#endif +#ifndef PySet_CheckExact + #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type) +#endif +#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) +#if PY_MAJOR_VERSION >= 3 + #define PyIntObject PyLongObject + #define PyInt_Type PyLong_Type + #define PyInt_Check(op) PyLong_Check(op) + #define PyInt_CheckExact(op) PyLong_CheckExact(op) + #define PyInt_FromString PyLong_FromString + #define PyInt_FromUnicode PyLong_FromUnicode + #define PyInt_FromLong PyLong_FromLong + #define PyInt_FromSize_t PyLong_FromSize_t + #define PyInt_FromSsize_t PyLong_FromSsize_t + #define PyInt_AsLong PyLong_AsLong + #define PyInt_AS_LONG PyLong_AS_LONG + #define PyInt_AsSsize_t PyLong_AsSsize_t + #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask + #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask + #define PyNumber_Int PyNumber_Long +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBoolObject PyLongObject +#endif +#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY + #ifndef PyUnicode_InternFromString + #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) + #endif +#endif +#if PY_VERSION_HEX < 0x030200A4 + typedef long Py_hash_t; + #define __Pyx_PyInt_FromHash_t PyInt_FromLong + #define __Pyx_PyInt_AsHash_t PyInt_AsLong +#else + #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t + #define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func)) +#else + #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass) +#endif +#if PY_VERSION_HEX >= 0x030500B1 +#define __Pyx_PyAsyncMethodsStruct PyAsyncMethods +#define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) +#elif CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 +typedef struct { + unaryfunc am_await; + unaryfunc am_aiter; + unaryfunc am_anext; +} __Pyx_PyAsyncMethodsStruct; +#define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) +#else +#define __Pyx_PyType_AsAsync(obj) NULL +#endif +#ifndef CYTHON_RESTRICT + #if defined(__GNUC__) + #define CYTHON_RESTRICT __restrict__ + #elif defined(_MSC_VER) && _MSC_VER >= 1400 + #define CYTHON_RESTRICT __restrict + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_RESTRICT restrict + #else + #define CYTHON_RESTRICT + #endif +#endif +#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) + +#ifndef CYTHON_INLINE + #if defined(__GNUC__) + #define CYTHON_INLINE __inline__ + #elif defined(_MSC_VER) + #define CYTHON_INLINE __inline + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_INLINE inline + #else + #define CYTHON_INLINE + #endif +#endif + +#if defined(WIN32) || defined(MS_WINDOWS) + #define _USE_MATH_DEFINES +#endif +#include +#ifdef NAN +#define __PYX_NAN() ((float) NAN) +#else +static CYTHON_INLINE float __PYX_NAN() { + float value; + memset(&value, 0xFF, sizeof(value)); + return value; +} +#endif + + +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) +#else + #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) +#endif + +#ifndef __PYX_EXTERN_C + #ifdef __cplusplus + #define __PYX_EXTERN_C extern "C" + #else + #define __PYX_EXTERN_C extern + #endif +#endif + +#define __PYX_HAVE__gensim__models__count_words_inner +#define __PYX_HAVE_API__gensim__models__count_words_inner +#include "string.h" +#include "stdio.h" +#include "pythread.h" +#include "stdint.h" +#include "murmurhash/MurmurHash3.h" +#include "murmurhash/MurmurHash2.h" +#ifdef _OPENMP +#include +#endif /* _OPENMP */ + +#ifdef PYREX_WITHOUT_ASSERTIONS +#define CYTHON_WITHOUT_ASSERTIONS +#endif + +#ifndef CYTHON_UNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_NCP_UNUSED +# if CYTHON_COMPILING_IN_CPYTHON +# define CYTHON_NCP_UNUSED +# else +# define CYTHON_NCP_UNUSED CYTHON_UNUSED +# endif +#endif +typedef struct {PyObject **p; char *s; const Py_ssize_t n; const char* encoding; + const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; + +#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0 +#define __PYX_DEFAULT_STRING_ENCODING "" +#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString +#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#define __Pyx_uchar_cast(c) ((unsigned char)c) +#define __Pyx_long_cast(x) ((long)x) +#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ + (sizeof(type) < sizeof(Py_ssize_t)) ||\ + (sizeof(type) > sizeof(Py_ssize_t) &&\ + likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX) &&\ + (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ + v == (type)PY_SSIZE_T_MIN))) ||\ + (sizeof(type) == sizeof(Py_ssize_t) &&\ + (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX))) ) +#if defined (__cplusplus) && __cplusplus >= 201103L + #include + #define __Pyx_sst_abs(value) std::abs(value) +#elif SIZEOF_INT >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) abs(value) +#elif SIZEOF_LONG >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) labs(value) +#elif defined (_MSC_VER) && defined (_M_X64) + #define __Pyx_sst_abs(value) _abs64(value) +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define __Pyx_sst_abs(value) llabs(value) +#elif defined (__GNUC__) + #define __Pyx_sst_abs(value) __builtin_llabs(value) +#else + #define __Pyx_sst_abs(value) ((value<0) ? -value : value) +#endif +static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*); +static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); +#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s)) +#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) +#define __Pyx_PyBytes_FromString PyBytes_FromString +#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); +#if PY_MAJOR_VERSION < 3 + #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#else + #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize +#endif +#define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) +#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) +#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) +#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) +#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) +#if PY_MAJOR_VERSION < 3 +static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) +{ + const Py_UNICODE *u_end = u; + while (*u_end++) ; + return (size_t)(u_end - u - 1); +} +#else +#define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen +#endif +#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u)) +#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode +#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode +#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) +#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) +#define __Pyx_PyBool_FromLong(b) ((b) ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False)) +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); +static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x); +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); +#if CYTHON_COMPILING_IN_CPYTHON +#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) +#else +#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) +#endif +#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII +static int __Pyx_sys_getdefaultencoding_not_ascii; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + PyObject* ascii_chars_u = NULL; + PyObject* ascii_chars_b = NULL; + const char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + if (strcmp(default_encoding_c, "ascii") == 0) { + __Pyx_sys_getdefaultencoding_not_ascii = 0; + } else { + char ascii_chars[128]; + int c; + for (c = 0; c < 128; c++) { + ascii_chars[c] = c; + } + __Pyx_sys_getdefaultencoding_not_ascii = 1; + ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); + if (!ascii_chars_u) goto bad; + ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); + if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { + PyErr_Format( + PyExc_ValueError, + "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", + default_encoding_c); + goto bad; + } + Py_DECREF(ascii_chars_u); + Py_DECREF(ascii_chars_b); + } + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + Py_XDECREF(ascii_chars_u); + Py_XDECREF(ascii_chars_b); + return -1; +} +#endif +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) +#else +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +static char* __PYX_DEFAULT_STRING_ENCODING; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c)); + if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; + strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + return -1; +} +#endif +#endif + + +/* Test for GCC > 2.95 */ +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) + #define likely(x) __builtin_expect(!!(x), 1) + #define unlikely(x) __builtin_expect(!!(x), 0) +#else /* !__GNUC__ or GCC < 2.95 */ + #define likely(x) (x) + #define unlikely(x) (x) +#endif /* __GNUC__ */ + +static PyObject *__pyx_m; +static PyObject *__pyx_d; +static PyObject *__pyx_b; +static PyObject *__pyx_empty_tuple; +static PyObject *__pyx_empty_bytes; +static int __pyx_lineno; +static int __pyx_clineno = 0; +static const char * __pyx_cfilenm= __FILE__; +static const char *__pyx_filename; + + +static const char *__pyx_f[] = { + "gensim/models/count_words_inner.pyx", + ".env/lib/python2.7/site-packages/Cython/Includes/cpython/type.pxd", + ".env/lib/python2.7/site-packages/Cython/Includes/cpython/bool.pxd", + ".env/lib/python2.7/site-packages/Cython/Includes/cpython/complex.pxd", + ".env/lib/python2.7/site-packages/cymem/cymem.pxd", + ".env/lib/python2.7/site-packages/preshed/maps.pxd", + ".env/lib/python2.7/site-packages/preshed/counter.pxd", +}; + +/* "maps.pxd":5 + * + * + * ctypedef uint64_t key_t # <<<<<<<<<<<<<< + * + * + */ +typedef uint64_t __pyx_t_7preshed_4maps_key_t; + +/* "preshed/counter.pxd":10 + * + * + * ctypedef int64_t count_t # <<<<<<<<<<<<<< + * + * + */ +typedef int64_t __pyx_t_7preshed_7counter_count_t; + +/*--- Type declarations ---*/ +struct __pyx_obj_5cymem_5cymem_Pool; +struct __pyx_obj_5cymem_5cymem_Address; +struct __pyx_obj_7preshed_4maps_PreshMap; +struct __pyx_obj_7preshed_4maps_PreshMapArray; +struct __pyx_obj_7preshed_7counter_PreshCounter; +struct __pyx_t_7preshed_4maps_Cell; +struct __pyx_t_7preshed_4maps_MapStruct; + +/* "maps.pxd":8 + * + * + * cdef struct Cell: # <<<<<<<<<<<<<< + * key_t key + * void* value + */ +struct __pyx_t_7preshed_4maps_Cell { + __pyx_t_7preshed_4maps_key_t key; + void *value; +}; + +/* "maps.pxd":13 + * + * + * cdef struct MapStruct: # <<<<<<<<<<<<<< + * Cell* cells + * void* value_for_empty_key + */ +struct __pyx_t_7preshed_4maps_MapStruct { + struct __pyx_t_7preshed_4maps_Cell *cells; + void *value_for_empty_key; + void *value_for_del_key; + __pyx_t_7preshed_4maps_key_t length; + __pyx_t_7preshed_4maps_key_t filled; + int is_empty_key_set; + int is_del_key_set; +}; + +/* "cymem/cymem.pxd":1 + * cdef class Pool: # <<<<<<<<<<<<<< + * cdef readonly size_t size + * cdef readonly dict addresses + */ +struct __pyx_obj_5cymem_5cymem_Pool { + PyObject_HEAD + struct __pyx_vtabstruct_5cymem_5cymem_Pool *__pyx_vtab; + size_t size; + PyObject *addresses; + PyObject *refs; +}; + + +/* "cymem/cymem.pxd":11 + * + * + * cdef class Address: # <<<<<<<<<<<<<< + * cdef void* ptr + */ +struct __pyx_obj_5cymem_5cymem_Address { + PyObject_HEAD + void *ptr; +}; + + +/* "maps.pxd":36 + * + * + * cdef class PreshMap: # <<<<<<<<<<<<<< + * cdef MapStruct* c_map + * cdef Pool mem + */ +struct __pyx_obj_7preshed_4maps_PreshMap { + PyObject_HEAD + struct __pyx_vtabstruct_7preshed_4maps_PreshMap *__pyx_vtab; + struct __pyx_t_7preshed_4maps_MapStruct *c_map; + struct __pyx_obj_5cymem_5cymem_Pool *mem; +}; + + +/* "maps.pxd":44 + * + * + * cdef class PreshMapArray: # <<<<<<<<<<<<<< + * cdef Pool mem + * cdef MapStruct* maps + */ +struct __pyx_obj_7preshed_4maps_PreshMapArray { + PyObject_HEAD + struct __pyx_vtabstruct_7preshed_4maps_PreshMapArray *__pyx_vtab; + struct __pyx_obj_5cymem_5cymem_Pool *mem; + struct __pyx_t_7preshed_4maps_MapStruct *maps; + size_t length; +}; + + +/* "preshed/counter.pxd":13 + * + * + * cdef class PreshCounter: # <<<<<<<<<<<<<< + * cdef Pool mem + * cdef MapStruct* c_map + */ +struct __pyx_obj_7preshed_7counter_PreshCounter { + PyObject_HEAD + struct __pyx_vtabstruct_7preshed_7counter_PreshCounter *__pyx_vtab; + struct __pyx_obj_5cymem_5cymem_Pool *mem; + struct __pyx_t_7preshed_4maps_MapStruct *c_map; + PyObject *smoother; + __pyx_t_7preshed_7counter_count_t total; +}; + + + +/* "cymem/cymem.pxd":1 + * cdef class Pool: # <<<<<<<<<<<<<< + * cdef readonly size_t size + * cdef readonly dict addresses + */ + +struct __pyx_vtabstruct_5cymem_5cymem_Pool { + void *(*alloc)(struct __pyx_obj_5cymem_5cymem_Pool *, size_t, size_t); + void (*free)(struct __pyx_obj_5cymem_5cymem_Pool *, void *); + void *(*realloc)(struct __pyx_obj_5cymem_5cymem_Pool *, void *, size_t); +}; +static struct __pyx_vtabstruct_5cymem_5cymem_Pool *__pyx_vtabptr_5cymem_5cymem_Pool; + + +/* "maps.pxd":36 + * + * + * cdef class PreshMap: # <<<<<<<<<<<<<< + * cdef MapStruct* c_map + * cdef Pool mem + */ + +struct __pyx_vtabstruct_7preshed_4maps_PreshMap { + void *(*get)(struct __pyx_obj_7preshed_4maps_PreshMap *, __pyx_t_7preshed_4maps_key_t); + void (*set)(struct __pyx_obj_7preshed_4maps_PreshMap *, __pyx_t_7preshed_4maps_key_t, void *); +}; +static struct __pyx_vtabstruct_7preshed_4maps_PreshMap *__pyx_vtabptr_7preshed_4maps_PreshMap; + + +/* "maps.pxd":44 + * + * + * cdef class PreshMapArray: # <<<<<<<<<<<<<< + * cdef Pool mem + * cdef MapStruct* maps + */ + +struct __pyx_vtabstruct_7preshed_4maps_PreshMapArray { + void *(*get)(struct __pyx_obj_7preshed_4maps_PreshMapArray *, size_t, __pyx_t_7preshed_4maps_key_t); + void (*set)(struct __pyx_obj_7preshed_4maps_PreshMapArray *, size_t, __pyx_t_7preshed_4maps_key_t, void *); +}; +static struct __pyx_vtabstruct_7preshed_4maps_PreshMapArray *__pyx_vtabptr_7preshed_4maps_PreshMapArray; + + +/* "preshed/counter.pxd":13 + * + * + * cdef class PreshCounter: # <<<<<<<<<<<<<< + * cdef Pool mem + * cdef MapStruct* c_map + */ + +struct __pyx_vtabstruct_7preshed_7counter_PreshCounter { + int (*inc)(struct __pyx_obj_7preshed_7counter_PreshCounter *, __pyx_t_7preshed_4maps_key_t, __pyx_t_7preshed_7counter_count_t, int __pyx_skip_dispatch); +}; +static struct __pyx_vtabstruct_7preshed_7counter_PreshCounter *__pyx_vtabptr_7preshed_7counter_PreshCounter; + +/* --- Runtime support code (head) --- */ +#ifndef CYTHON_REFNANNY + #define CYTHON_REFNANNY 0 +#endif +#if CYTHON_REFNANNY + typedef struct { + void (*INCREF)(void*, PyObject*, int); + void (*DECREF)(void*, PyObject*, int); + void (*GOTREF)(void*, PyObject*, int); + void (*GIVEREF)(void*, PyObject*, int); + void* (*SetupContext)(const char*, int, const char*); + void (*FinishContext)(void**); + } __Pyx_RefNannyAPIStruct; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); + #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; +#ifdef WITH_THREAD + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + if (acquire_gil) {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ + PyGILState_Release(__pyx_gilstate_save);\ + } else {\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ + } +#else + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__) +#endif + #define __Pyx_RefNannyFinishContext()\ + __Pyx_RefNanny->FinishContext(&__pyx_refnanny) + #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0) + #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0) + #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0) + #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0) +#else + #define __Pyx_RefNannyDeclarations + #define __Pyx_RefNannySetupContext(name, acquire_gil) + #define __Pyx_RefNannyFinishContext() + #define __Pyx_INCREF(r) Py_INCREF(r) + #define __Pyx_DECREF(r) Py_DECREF(r) + #define __Pyx_GOTREF(r) + #define __Pyx_GIVEREF(r) + #define __Pyx_XINCREF(r) Py_XINCREF(r) + #define __Pyx_XDECREF(r) Py_XDECREF(r) + #define __Pyx_XGOTREF(r) + #define __Pyx_XGIVEREF(r) +#endif +#define __Pyx_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_XDECREF(tmp);\ + } while (0) +#define __Pyx_DECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_DECREF(tmp);\ + } while (0) +#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) +#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) + +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro)) + return tp->tp_getattro(obj, attr_name); +#if PY_MAJOR_VERSION < 3 + if (likely(tp->tp_getattr)) + return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); +#endif + return PyObject_GetAttr(obj, attr_name); +} +#else +#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) +#endif + +static PyObject *__Pyx_GetBuiltinName(PyObject *name); + +static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, + const char *name, int exact); + +static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, + Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); + +static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); + +static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\ + PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\ + const char* function_name); + +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); +#else +#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) +#endif + +#if CYTHON_COMPILING_IN_CPYTHON +static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, long intval, int inplace); +#else +#define __Pyx_PyInt_AddObjC(op1, op2, intval, inplace)\ + (inplace ? PyNumber_InPlaceAdd(op1, op2) : PyNumber_Add(op1, op2)) +#endif + +#if CYTHON_COMPILING_IN_CPYTHON +static PyObject* __Pyx_PyInt_EqObjC(PyObject *op1, PyObject *op2, long intval, int inplace); +#else +#define __Pyx_PyInt_EqObjC(op1, op2, intval, inplace)\ + PyObject_RichCompare(op1, op2, Py_EQ) + #endif + +static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb); +static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb); + +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); + +#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\ + (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\ + __Pyx_GetItemInt_Generic(o, to_py_func(i)))) +#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ + (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL)) +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, + int wraparound, int boundscheck); +#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ + (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL)) +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, + int wraparound, int boundscheck); +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j); +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, + int is_list, int wraparound, int boundscheck); + +static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name); + +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); +#endif + +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); + +static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected); + +static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index); + +static CYTHON_INLINE int __Pyx_IterFinish(void); + +static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected); + +static void* __Pyx_GetVtable(PyObject *dict); + +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); + +static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); + +typedef struct { + int code_line; + PyCodeObject* code_object; +} __Pyx_CodeObjectCacheEntry; +struct __Pyx_CodeObjectCache { + int count; + int max_count; + __Pyx_CodeObjectCacheEntry* entries; +}; +static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); +static PyCodeObject *__pyx_find_code_object(int code_line); +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); + +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename); + +static CYTHON_INLINE int64_t __Pyx_PyInt_As_int64_t(PyObject *); + +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); + +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_uint64_t(uint64_t value); + +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); + +static CYTHON_INLINE uint64_t __Pyx_PyInt_As_uint64_t(PyObject *); + +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); + +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); + +static int __Pyx_check_binary_version(void); + +#if !defined(__Pyx_PyIdentifier_FromString) +#if PY_MAJOR_VERSION < 3 + #define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s) +#else + #define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s) +#endif +#endif + +static PyObject *__Pyx_ImportModule(const char *name); + +static PyTypeObject *__Pyx_ImportType(const char *module_name, const char *class_name, size_t size, int strict); + +static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig); + +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); + + +/* Module declarations from 'cpython.version' */ + +/* Module declarations from '__builtin__' */ + +/* Module declarations from 'cpython.type' */ +static PyTypeObject *__pyx_ptype_7cpython_4type_type = 0; + +/* Module declarations from 'libc.string' */ + +/* Module declarations from 'libc.stdio' */ + +/* Module declarations from 'cpython.object' */ + +/* Module declarations from 'cpython.ref' */ + +/* Module declarations from 'cpython.exc' */ + +/* Module declarations from 'cpython.module' */ + +/* Module declarations from 'cpython.mem' */ + +/* Module declarations from 'cpython.tuple' */ + +/* Module declarations from 'cpython.list' */ + +/* Module declarations from 'cpython.sequence' */ + +/* Module declarations from 'cpython.mapping' */ + +/* Module declarations from 'cpython.iterator' */ + +/* Module declarations from 'cpython.number' */ + +/* Module declarations from 'cpython.int' */ + +/* Module declarations from '__builtin__' */ + +/* Module declarations from 'cpython.bool' */ +static PyTypeObject *__pyx_ptype_7cpython_4bool_bool = 0; + +/* Module declarations from 'cpython.long' */ + +/* Module declarations from 'cpython.float' */ + +/* Module declarations from '__builtin__' */ + +/* Module declarations from 'cpython.complex' */ +static PyTypeObject *__pyx_ptype_7cpython_7complex_complex = 0; + +/* Module declarations from 'cpython.string' */ + +/* Module declarations from 'cpython.unicode' */ + +/* Module declarations from 'cpython.dict' */ + +/* Module declarations from 'cpython.instance' */ + +/* Module declarations from 'cpython.function' */ + +/* Module declarations from 'cpython.method' */ + +/* Module declarations from 'cpython.weakref' */ + +/* Module declarations from 'cpython.getargs' */ + +/* Module declarations from 'cpython.pythread' */ + +/* Module declarations from 'cpython.pystate' */ + +/* Module declarations from 'cpython.cobject' */ + +/* Module declarations from 'cpython.oldbuffer' */ + +/* Module declarations from 'cpython.set' */ + +/* Module declarations from 'cpython.buffer' */ + +/* Module declarations from 'cpython.bytes' */ + +/* Module declarations from 'cpython.pycapsule' */ + +/* Module declarations from 'cpython' */ + +/* Module declarations from 'libc.stdint' */ + +/* Module declarations from 'murmurhash.mrmr' */ +static uint64_t (*__pyx_f_10murmurhash_4mrmr_hash64)(void *, int, uint64_t); /*proto*/ + +/* Module declarations from 'cymem.cymem' */ +static PyTypeObject *__pyx_ptype_5cymem_5cymem_Pool = 0; +static PyTypeObject *__pyx_ptype_5cymem_5cymem_Address = 0; + +/* Module declarations from 'preshed.maps' */ +static PyTypeObject *__pyx_ptype_7preshed_4maps_PreshMap = 0; +static PyTypeObject *__pyx_ptype_7preshed_4maps_PreshMapArray = 0; +static void *(*__pyx_f_7preshed_4maps_map_get)(struct __pyx_t_7preshed_4maps_MapStruct const *, __pyx_t_7preshed_4maps_key_t const ); /*proto*/ +static void (*__pyx_f_7preshed_4maps_map_set)(struct __pyx_obj_5cymem_5cymem_Pool *, struct __pyx_t_7preshed_4maps_MapStruct *, __pyx_t_7preshed_4maps_key_t, void *); /*proto*/ +static void (*__pyx_f_7preshed_4maps_map_init)(struct __pyx_obj_5cymem_5cymem_Pool *, struct __pyx_t_7preshed_4maps_MapStruct *, size_t); /*proto*/ + +/* Module declarations from 'preshed.counter' */ +static PyTypeObject *__pyx_ptype_7preshed_7counter_PreshCounter = 0; + +/* Module declarations from 'gensim.models.count_words_inner' */ +static uint64_t __pyx_f_6gensim_6models_17count_words_inner__hash_string(PyObject *, int __pyx_skip_dispatch); /*proto*/ +static uint64_t __pyx_f_6gensim_6models_17count_words_inner__hash_bytes(PyObject *, int __pyx_skip_dispatch); /*proto*/ +#define __Pyx_MODULE_NAME "gensim.models.count_words_inner" +int __pyx_module_is_main_gensim__models__count_words_inner = 0; + +/* Implementation of 'gensim.models.count_words_inner' */ +static PyObject *__pyx_builtin_enumerate; +static PyObject *__pyx_builtin_TypeError; +static char __pyx_k_key[] = "key"; +static char __pyx_k_six[] = "six"; +static char __pyx_k_main[] = "__main__"; +static char __pyx_k_test[] = "__test__"; +static char __pyx_k_word[] = "word"; +static char __pyx_k_count[] = "count"; +static char __pyx_k_vocab[] = "vocab"; +static char __pyx_k_counts[] = "counts"; +static char __pyx_k_import[] = "__import__"; +static char __pyx_k_strings[] = "strings"; +static char __pyx_k_min_freq[] = "min_freq"; +static char __pyx_k_sentence[] = "sentence"; +static char __pyx_k_TypeError[] = "TypeError"; +static char __pyx_k_enumerate[] = "enumerate"; +static char __pyx_k_iteritems[] = "iteritems"; +static char __pyx_k_sentences[] = "sentences"; +static char __pyx_k_pyx_vtable[] = "__pyx_vtable__"; +static char __pyx_k_collections[] = "collections"; +static char __pyx_k_defaultdict[] = "defaultdict"; +static char __pyx_k_sentence_no[] = "sentence_no"; +static char __pyx_k_total_words[] = "total_words"; +static char __pyx_k_log_progress[] = "log_progress"; +static char __pyx_k_progress_per[] = "progress_per"; +static char __pyx_k_count_words_fast[] = "count_words_fast"; +static char __pyx_k_Users_matt_repos_gensim_gensim[] = "/Users/matt/repos/gensim/gensim/models/count_words_inner.pyx"; +static char __pyx_k_gensim_models_count_words_inner[] = "gensim.models.count_words_inner"; +static PyObject *__pyx_n_s_TypeError; +static PyObject *__pyx_kp_s_Users_matt_repos_gensim_gensim; +static PyObject *__pyx_n_s_collections; +static PyObject *__pyx_n_s_count; +static PyObject *__pyx_n_s_count_words_fast; +static PyObject *__pyx_n_s_counts; +static PyObject *__pyx_n_s_defaultdict; +static PyObject *__pyx_n_s_enumerate; +static PyObject *__pyx_n_s_gensim_models_count_words_inner; +static PyObject *__pyx_n_s_import; +static PyObject *__pyx_n_s_iteritems; +static PyObject *__pyx_n_s_key; +static PyObject *__pyx_n_s_log_progress; +static PyObject *__pyx_n_s_main; +static PyObject *__pyx_n_s_min_freq; +static PyObject *__pyx_n_s_progress_per; +static PyObject *__pyx_n_s_pyx_vtable; +static PyObject *__pyx_n_s_sentence; +static PyObject *__pyx_n_s_sentence_no; +static PyObject *__pyx_n_s_sentences; +static PyObject *__pyx_n_s_six; +static PyObject *__pyx_n_s_strings; +static PyObject *__pyx_n_s_test; +static PyObject *__pyx_n_s_total_words; +static PyObject *__pyx_n_s_vocab; +static PyObject *__pyx_n_s_word; +static PyObject *__pyx_pf_6gensim_6models_17count_words_inner__hash_string(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_17count_words_inner_2_hash_bytes(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_17count_words_inner_4count_words_fast(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_sentences, __pyx_t_7preshed_7counter_count_t __pyx_v_min_freq, int __pyx_v_progress_per, PyObject *__pyx_v_log_progress); /* proto */ +static PyObject *__pyx_int_0; +static PyObject *__pyx_int_1; +static PyObject *__pyx_int_neg_1; +static PyObject *__pyx_tuple_; +static PyObject *__pyx_codeobj__2; + +/* "gensim/models/count_words_inner.pyx":20 + * + * + * cpdef uint64_t _hash_string(unicode string) except 0: # <<<<<<<<<<<<<< + * # This code is copied from spacy.strings. The implementation took some thought, + * # and consultation with Stefan Behnel. Do not change blindly. Interaction + */ + +static PyObject *__pyx_pw_6gensim_6models_17count_words_inner_1_hash_string(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ +static uint64_t __pyx_f_6gensim_6models_17count_words_inner__hash_string(PyObject *__pyx_v_string, CYTHON_UNUSED int __pyx_skip_dispatch) { + char *__pyx_v_chars; + Py_ssize_t __pyx_v_size; + uint64_t __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("_hash_string", 0); + + /* "gensim/models/count_words_inner.pyx":24 + * # and consultation with Stefan Behnel. Do not change blindly. Interaction + * # with Python 2/3 is subtle. + * chars = PyUnicode_AS_DATA(string) # <<<<<<<<<<<<<< + * size = PyUnicode_GET_DATA_SIZE(string) + * return hash64(chars, size, 1) + */ + __pyx_v_chars = ((char *)PyUnicode_AS_DATA(__pyx_v_string)); + + /* "gensim/models/count_words_inner.pyx":25 + * # with Python 2/3 is subtle. + * chars = PyUnicode_AS_DATA(string) + * size = PyUnicode_GET_DATA_SIZE(string) # <<<<<<<<<<<<<< + * return hash64(chars, size, 1) + * + */ + __pyx_v_size = PyUnicode_GET_DATA_SIZE(__pyx_v_string); + + /* "gensim/models/count_words_inner.pyx":26 + * chars = PyUnicode_AS_DATA(string) + * size = PyUnicode_GET_DATA_SIZE(string) + * return hash64(chars, size, 1) # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = __pyx_f_10murmurhash_4mrmr_hash64(__pyx_v_chars, __pyx_v_size, 1); + goto __pyx_L0; + + /* "gensim/models/count_words_inner.pyx":20 + * + * + * cpdef uint64_t _hash_string(unicode string) except 0: # <<<<<<<<<<<<<< + * # This code is copied from spacy.strings. The implementation took some thought, + * # and consultation with Stefan Behnel. Do not change blindly. Interaction + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_6gensim_6models_17count_words_inner_1_hash_string(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ +static PyObject *__pyx_pw_6gensim_6models_17count_words_inner_1_hash_string(PyObject *__pyx_self, PyObject *__pyx_v_string) { + CYTHON_UNUSED int __pyx_lineno = 0; + CYTHON_UNUSED const char *__pyx_filename = NULL; + CYTHON_UNUSED int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("_hash_string (wrapper)", 0); + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyUnicode_Type), 1, "string", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_r = __pyx_pf_6gensim_6models_17count_words_inner__hash_string(__pyx_self, ((PyObject*)__pyx_v_string)); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_6gensim_6models_17count_words_inner__hash_string(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + uint64_t __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("_hash_string", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_6gensim_6models_17count_words_inner__hash_string(__pyx_v_string, 0); if (unlikely(__pyx_t_1 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_From_uint64_t(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("gensim.models.count_words_inner._hash_string", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "gensim/models/count_words_inner.pyx":29 + * + * + * cpdef uint64_t _hash_bytes(bytes string) except 0: # <<<<<<<<<<<<<< + * chars = string + * return hash64(chars, len(string), 1) + */ + +static PyObject *__pyx_pw_6gensim_6models_17count_words_inner_3_hash_bytes(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ +static uint64_t __pyx_f_6gensim_6models_17count_words_inner__hash_bytes(PyObject *__pyx_v_string, CYTHON_UNUSED int __pyx_skip_dispatch) { + char *__pyx_v_chars; + uint64_t __pyx_r; + __Pyx_RefNannyDeclarations + char *__pyx_t_1; + Py_ssize_t __pyx_t_2; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("_hash_bytes", 0); + + /* "gensim/models/count_words_inner.pyx":30 + * + * cpdef uint64_t _hash_bytes(bytes string) except 0: + * chars = string # <<<<<<<<<<<<<< + * return hash64(chars, len(string), 1) + * + */ + __pyx_t_1 = __Pyx_PyObject_AsString(__pyx_v_string); if (unlikely((!__pyx_t_1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_chars = ((char *)__pyx_t_1); + + /* "gensim/models/count_words_inner.pyx":31 + * cpdef uint64_t _hash_bytes(bytes string) except 0: + * chars = string + * return hash64(chars, len(string), 1) # <<<<<<<<<<<<<< + * + * + */ + if (unlikely(__pyx_v_string == Py_None)) { + PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_t_2 = PyBytes_GET_SIZE(__pyx_v_string); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_r = __pyx_f_10murmurhash_4mrmr_hash64(__pyx_v_chars, __pyx_t_2, 1); + goto __pyx_L0; + + /* "gensim/models/count_words_inner.pyx":29 + * + * + * cpdef uint64_t _hash_bytes(bytes string) except 0: # <<<<<<<<<<<<<< + * chars = string + * return hash64(chars, len(string), 1) + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_AddTraceback("gensim.models.count_words_inner._hash_bytes", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_6gensim_6models_17count_words_inner_3_hash_bytes(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ +static PyObject *__pyx_pw_6gensim_6models_17count_words_inner_3_hash_bytes(PyObject *__pyx_self, PyObject *__pyx_v_string) { + CYTHON_UNUSED int __pyx_lineno = 0; + CYTHON_UNUSED const char *__pyx_filename = NULL; + CYTHON_UNUSED int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("_hash_bytes (wrapper)", 0); + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyBytes_Type), 1, "string", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_r = __pyx_pf_6gensim_6models_17count_words_inner_2_hash_bytes(__pyx_self, ((PyObject*)__pyx_v_string)); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_6gensim_6models_17count_words_inner_2_hash_bytes(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + uint64_t __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("_hash_bytes", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_6gensim_6models_17count_words_inner__hash_bytes(__pyx_v_string, 0); if (unlikely(__pyx_t_1 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_From_uint64_t(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("gensim.models.count_words_inner._hash_bytes", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "gensim/models/count_words_inner.pyx":34 + * + * + * def count_words_fast(sentences, count_t min_freq, int progress_per, log_progress): # <<<<<<<<<<<<<< + * cdef PreshCounter counts = PreshCounter() + * strings = {} + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_6gensim_6models_17count_words_inner_5count_words_fast(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyMethodDef __pyx_mdef_6gensim_6models_17count_words_inner_5count_words_fast = {"count_words_fast", (PyCFunction)__pyx_pw_6gensim_6models_17count_words_inner_5count_words_fast, METH_VARARGS|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_6gensim_6models_17count_words_inner_5count_words_fast(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyObject *__pyx_v_sentences = 0; + __pyx_t_7preshed_7counter_count_t __pyx_v_min_freq; + int __pyx_v_progress_per; + PyObject *__pyx_v_log_progress = 0; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("count_words_fast (wrapper)", 0); + { + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_sentences,&__pyx_n_s_min_freq,&__pyx_n_s_progress_per,&__pyx_n_s_log_progress,0}; + PyObject* values[4] = {0,0,0,0}; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args; + const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); + switch (pos_args) { + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = PyDict_Size(__pyx_kwds); + switch (pos_args) { + case 0: + if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentences)) != 0)) kw_args--; + else goto __pyx_L5_argtuple_error; + case 1: + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_min_freq)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("count_words_fast", 1, 4, 4, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 2: + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_progress_per)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("count_words_fast", 1, 4, 4, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 3: + if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_log_progress)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("count_words_fast", 1, 4, 4, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "count_words_fast") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } else if (PyTuple_GET_SIZE(__pyx_args) != 4) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + } + __pyx_v_sentences = values[0]; + __pyx_v_min_freq = __Pyx_PyInt_As_int64_t(values[1]); if (unlikely((__pyx_v_min_freq == (int64_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_progress_per = __Pyx_PyInt_As_int(values[2]); if (unlikely((__pyx_v_progress_per == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_log_progress = values[3]; + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("count_words_fast", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_L3_error:; + __Pyx_AddTraceback("gensim.models.count_words_inner.count_words_fast", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_6gensim_6models_17count_words_inner_4count_words_fast(__pyx_self, __pyx_v_sentences, __pyx_v_min_freq, __pyx_v_progress_per, __pyx_v_log_progress); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_6gensim_6models_17count_words_inner_4count_words_fast(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_sentences, __pyx_t_7preshed_7counter_count_t __pyx_v_min_freq, int __pyx_v_progress_per, PyObject *__pyx_v_log_progress) { + struct __pyx_obj_7preshed_7counter_PreshCounter *__pyx_v_counts = 0; + PyObject *__pyx_v_strings = NULL; + PyObject *__pyx_v_sentence_no = NULL; + PyObject *__pyx_v_total_words = NULL; + uint64_t __pyx_v_key; + __pyx_t_7preshed_7counter_count_t __pyx_v_count; + PyObject *__pyx_v_sentence = NULL; + PyObject *__pyx_v_word = NULL; + PyObject *__pyx_v_vocab = NULL; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + Py_ssize_t __pyx_t_3; + PyObject *(*__pyx_t_4)(PyObject *); + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + int __pyx_t_7; + Py_ssize_t __pyx_t_8; + PyObject *__pyx_t_9 = NULL; + PyObject *__pyx_t_10 = NULL; + PyObject *__pyx_t_11 = NULL; + PyObject *(*__pyx_t_12)(PyObject *); + int __pyx_t_13; + uint64_t __pyx_t_14; + int __pyx_t_15; + __pyx_t_7preshed_7counter_count_t __pyx_t_16; + PyObject *(*__pyx_t_17)(PyObject *); + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("count_words_fast", 0); + + /* "gensim/models/count_words_inner.pyx":35 + * + * def count_words_fast(sentences, count_t min_freq, int progress_per, log_progress): + * cdef PreshCounter counts = PreshCounter() # <<<<<<<<<<<<<< + * strings = {} + * sentence_no = -1 + */ + __pyx_t_1 = __Pyx_PyObject_Call(((PyObject *)__pyx_ptype_7preshed_7counter_PreshCounter), __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_counts = ((struct __pyx_obj_7preshed_7counter_PreshCounter *)__pyx_t_1); + __pyx_t_1 = 0; + + /* "gensim/models/count_words_inner.pyx":36 + * def count_words_fast(sentences, count_t min_freq, int progress_per, log_progress): + * cdef PreshCounter counts = PreshCounter() + * strings = {} # <<<<<<<<<<<<<< + * sentence_no = -1 + * total_words = 0 + */ + __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_strings = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "gensim/models/count_words_inner.pyx":37 + * cdef PreshCounter counts = PreshCounter() + * strings = {} + * sentence_no = -1 # <<<<<<<<<<<<<< + * total_words = 0 + * cdef uint64_t key + */ + __Pyx_INCREF(__pyx_int_neg_1); + __pyx_v_sentence_no = __pyx_int_neg_1; + + /* "gensim/models/count_words_inner.pyx":38 + * strings = {} + * sentence_no = -1 + * total_words = 0 # <<<<<<<<<<<<<< + * cdef uint64_t key + * cdef count_t count + */ + __Pyx_INCREF(__pyx_int_0); + __pyx_v_total_words = __pyx_int_0; + + /* "gensim/models/count_words_inner.pyx":41 + * cdef uint64_t key + * cdef count_t count + * for sentence_no, sentence in enumerate(sentences): # <<<<<<<<<<<<<< + * if sentence_no % progress_per == 0: + * log_progress(sentence_no, total_words, len(strings)) + */ + __Pyx_INCREF(__pyx_int_0); + __pyx_t_1 = __pyx_int_0; + if (likely(PyList_CheckExact(__pyx_v_sentences)) || PyTuple_CheckExact(__pyx_v_sentences)) { + __pyx_t_2 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_2); __pyx_t_3 = 0; + __pyx_t_4 = NULL; + } else { + __pyx_t_3 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_4 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + for (;;) { + if (likely(!__pyx_t_4)) { + if (likely(PyList_CheckExact(__pyx_t_2))) { + if (__pyx_t_3 >= PyList_GET_SIZE(__pyx_t_2)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_5 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + #endif + } else { + if (__pyx_t_3 >= PyTuple_GET_SIZE(__pyx_t_2)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + #endif + } + } else { + __pyx_t_5 = __pyx_t_4(__pyx_t_2); + if (unlikely(!__pyx_t_5)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_5); + } + __Pyx_XDECREF_SET(__pyx_v_sentence, __pyx_t_5); + __pyx_t_5 = 0; + __Pyx_INCREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_sentence_no, __pyx_t_1); + __pyx_t_5 = __Pyx_PyInt_AddObjC(__pyx_t_1, __pyx_int_1, 1, 0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_1); + __pyx_t_1 = __pyx_t_5; + __pyx_t_5 = 0; + + /* "gensim/models/count_words_inner.pyx":42 + * cdef count_t count + * for sentence_no, sentence in enumerate(sentences): + * if sentence_no % progress_per == 0: # <<<<<<<<<<<<<< + * log_progress(sentence_no, total_words, len(strings)) + * + */ + __pyx_t_5 = __Pyx_PyInt_From_int(__pyx_v_progress_per); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_6 = PyNumber_Remainder(__pyx_v_sentence_no, __pyx_t_5); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_t_5 = __Pyx_PyInt_EqObjC(__pyx_t_6, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + if (__pyx_t_7) { + + /* "gensim/models/count_words_inner.pyx":43 + * for sentence_no, sentence in enumerate(sentences): + * if sentence_no % progress_per == 0: + * log_progress(sentence_no, total_words, len(strings)) # <<<<<<<<<<<<<< + * + * for word in sentence: + */ + __pyx_t_8 = PyDict_Size(__pyx_v_strings); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyInt_FromSsize_t(__pyx_t_8); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_INCREF(__pyx_v_log_progress); + __pyx_t_9 = __pyx_v_log_progress; __pyx_t_10 = NULL; + __pyx_t_8 = 0; + if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_9))) { + __pyx_t_10 = PyMethod_GET_SELF(__pyx_t_9); + if (likely(__pyx_t_10)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_9); + __Pyx_INCREF(__pyx_t_10); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_9, function); + __pyx_t_8 = 1; + } + } + __pyx_t_11 = PyTuple_New(3+__pyx_t_8); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + if (__pyx_t_10) { + __Pyx_GIVEREF(__pyx_t_10); PyTuple_SET_ITEM(__pyx_t_11, 0, __pyx_t_10); __pyx_t_10 = NULL; + } + __Pyx_INCREF(__pyx_v_sentence_no); + __Pyx_GIVEREF(__pyx_v_sentence_no); + PyTuple_SET_ITEM(__pyx_t_11, 0+__pyx_t_8, __pyx_v_sentence_no); + __Pyx_INCREF(__pyx_v_total_words); + __Pyx_GIVEREF(__pyx_v_total_words); + PyTuple_SET_ITEM(__pyx_t_11, 1+__pyx_t_8, __pyx_v_total_words); + __Pyx_GIVEREF(__pyx_t_6); + PyTuple_SET_ITEM(__pyx_t_11, 2+__pyx_t_8, __pyx_t_6); + __pyx_t_6 = 0; + __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_9, __pyx_t_11, NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + + /* "gensim/models/count_words_inner.pyx":42 + * cdef count_t count + * for sentence_no, sentence in enumerate(sentences): + * if sentence_no % progress_per == 0: # <<<<<<<<<<<<<< + * log_progress(sentence_no, total_words, len(strings)) + * + */ + } + + /* "gensim/models/count_words_inner.pyx":45 + * log_progress(sentence_no, total_words, len(strings)) + * + * for word in sentence: # <<<<<<<<<<<<<< + * # There's a likely bug here: we're going to be maintaining separate + * # counts for unicode and byte strings, where defaultdict presumably + */ + if (likely(PyList_CheckExact(__pyx_v_sentence)) || PyTuple_CheckExact(__pyx_v_sentence)) { + __pyx_t_5 = __pyx_v_sentence; __Pyx_INCREF(__pyx_t_5); __pyx_t_8 = 0; + __pyx_t_12 = NULL; + } else { + __pyx_t_8 = -1; __pyx_t_5 = PyObject_GetIter(__pyx_v_sentence); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_12 = Py_TYPE(__pyx_t_5)->tp_iternext; if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + for (;;) { + if (likely(!__pyx_t_12)) { + if (likely(PyList_CheckExact(__pyx_t_5))) { + if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_5)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_9 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_9); __pyx_t_8++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_9 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_9); + #endif + } else { + if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_5)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_9 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_9); __pyx_t_8++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_9 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_9); + #endif + } + } else { + __pyx_t_9 = __pyx_t_12(__pyx_t_5); + if (unlikely(!__pyx_t_9)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_9); + } + __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_9); + __pyx_t_9 = 0; + + /* "gensim/models/count_words_inner.pyx":53 + * # implications are pretty bad. It might be best to merge the counts + * # when we form up the final vocab. + * if isinstance(word, unicode): # <<<<<<<<<<<<<< + * key = _hash_string(word) + * elif isinstance(word, bytes): + */ + __pyx_t_7 = PyUnicode_Check(__pyx_v_word); + __pyx_t_13 = (__pyx_t_7 != 0); + if (__pyx_t_13) { + + /* "gensim/models/count_words_inner.pyx":54 + * # when we form up the final vocab. + * if isinstance(word, unicode): + * key = _hash_string(word) # <<<<<<<<<<<<<< + * elif isinstance(word, bytes): + * key = _hash_bytes(word) + */ + if (!(likely(PyUnicode_CheckExact(__pyx_v_word))||((__pyx_v_word) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "unicode", Py_TYPE(__pyx_v_word)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __pyx_f_6gensim_6models_17count_words_inner__hash_string(((PyObject*)__pyx_v_word), 0); if (unlikely(__pyx_t_14 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_key = __pyx_t_14; + + /* "gensim/models/count_words_inner.pyx":53 + * # implications are pretty bad. It might be best to merge the counts + * # when we form up the final vocab. + * if isinstance(word, unicode): # <<<<<<<<<<<<<< + * key = _hash_string(word) + * elif isinstance(word, bytes): + */ + goto __pyx_L8; + } + + /* "gensim/models/count_words_inner.pyx":55 + * if isinstance(word, unicode): + * key = _hash_string(word) + * elif isinstance(word, bytes): # <<<<<<<<<<<<<< + * key = _hash_bytes(word) + * else: + */ + __pyx_t_13 = PyBytes_Check(__pyx_v_word); + __pyx_t_7 = (__pyx_t_13 != 0); + if (__pyx_t_7) { + + /* "gensim/models/count_words_inner.pyx":56 + * key = _hash_string(word) + * elif isinstance(word, bytes): + * key = _hash_bytes(word) # <<<<<<<<<<<<<< + * else: + * raise TypeError(type(word)) + */ + if (!(likely(PyBytes_CheckExact(__pyx_v_word))||((__pyx_v_word) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_word)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __pyx_f_6gensim_6models_17count_words_inner__hash_bytes(((PyObject*)__pyx_v_word), 0); if (unlikely(__pyx_t_14 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_key = __pyx_t_14; + + /* "gensim/models/count_words_inner.pyx":55 + * if isinstance(word, unicode): + * key = _hash_string(word) + * elif isinstance(word, bytes): # <<<<<<<<<<<<<< + * key = _hash_bytes(word) + * else: + */ + goto __pyx_L8; + } + + /* "gensim/models/count_words_inner.pyx":58 + * key = _hash_bytes(word) + * else: + * raise TypeError(type(word)) # <<<<<<<<<<<<<< + * counts.inc(key, 1) + * # TODO: Why doesn't .inc return this? =/ + */ + /*else*/ { + __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_9); + __Pyx_INCREF(((PyObject *)Py_TYPE(__pyx_v_word))); + __Pyx_GIVEREF(((PyObject *)Py_TYPE(__pyx_v_word))); + PyTuple_SET_ITEM(__pyx_t_9, 0, ((PyObject *)Py_TYPE(__pyx_v_word))); + __pyx_t_11 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_t_9, NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __Pyx_Raise(__pyx_t_11, 0, 0, 0); + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_L8:; + + /* "gensim/models/count_words_inner.pyx":59 + * else: + * raise TypeError(type(word)) + * counts.inc(key, 1) # <<<<<<<<<<<<<< + * # TODO: Why doesn't .inc return this? =/ + * count = counts[key] + */ + __pyx_t_15 = ((struct __pyx_vtabstruct_7preshed_7counter_PreshCounter *)__pyx_v_counts->__pyx_vtab)->inc(__pyx_v_counts, __pyx_v_key, 1, 0); if (unlikely(__pyx_t_15 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + + /* "gensim/models/count_words_inner.pyx":61 + * counts.inc(key, 1) + * # TODO: Why doesn't .inc return this? =/ + * count = counts[key] # <<<<<<<<<<<<<< + * # Remember the string when we exceed min count + * if count == min_freq: + */ + __pyx_t_11 = __Pyx_GetItemInt(((PyObject *)__pyx_v_counts), __pyx_v_key, uint64_t, 0, __Pyx_PyInt_From_uint64_t, 0, 0, 1); if (unlikely(__pyx_t_11 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_11); + __pyx_t_16 = __Pyx_PyInt_As_int64_t(__pyx_t_11); if (unlikely((__pyx_t_16 == (int64_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + __pyx_v_count = __pyx_t_16; + + /* "gensim/models/count_words_inner.pyx":63 + * count = counts[key] + * # Remember the string when we exceed min count + * if count == min_freq: # <<<<<<<<<<<<<< + * strings[key] = word + * total_words += len(sentence) + */ + __pyx_t_7 = ((__pyx_v_count == __pyx_v_min_freq) != 0); + if (__pyx_t_7) { + + /* "gensim/models/count_words_inner.pyx":64 + * # Remember the string when we exceed min count + * if count == min_freq: + * strings[key] = word # <<<<<<<<<<<<<< + * total_words += len(sentence) + * + */ + __pyx_t_11 = __Pyx_PyInt_From_uint64_t(__pyx_v_key); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + if (unlikely(PyDict_SetItem(__pyx_v_strings, __pyx_t_11, __pyx_v_word) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + + /* "gensim/models/count_words_inner.pyx":63 + * count = counts[key] + * # Remember the string when we exceed min count + * if count == min_freq: # <<<<<<<<<<<<<< + * strings[key] = word + * total_words += len(sentence) + */ + } + + /* "gensim/models/count_words_inner.pyx":45 + * log_progress(sentence_no, total_words, len(strings)) + * + * for word in sentence: # <<<<<<<<<<<<<< + * # There's a likely bug here: we're going to be maintaining separate + * # counts for unicode and byte strings, where defaultdict presumably + */ + } + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + + /* "gensim/models/count_words_inner.pyx":65 + * if count == min_freq: + * strings[key] = word + * total_words += len(sentence) # <<<<<<<<<<<<<< + * + * # Use defaultdict to match the pure Python version of the function + */ + __pyx_t_8 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyInt_FromSsize_t(__pyx_t_8); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_11 = PyNumber_InPlaceAdd(__pyx_v_total_words, __pyx_t_5); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __Pyx_DECREF_SET(__pyx_v_total_words, __pyx_t_11); + __pyx_t_11 = 0; + + /* "gensim/models/count_words_inner.pyx":41 + * cdef uint64_t key + * cdef count_t count + * for sentence_no, sentence in enumerate(sentences): # <<<<<<<<<<<<<< + * if sentence_no % progress_per == 0: + * log_progress(sentence_no, total_words, len(strings)) + */ + } + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "gensim/models/count_words_inner.pyx":68 + * + * # Use defaultdict to match the pure Python version of the function + * vocab = defaultdict(int) # <<<<<<<<<<<<<< + * for key, word in iteritems(strings): + * vocab[word] = counts[key] + */ + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_defaultdict); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_11 = NULL; + if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_2))) { + __pyx_t_11 = PyMethod_GET_SELF(__pyx_t_2); + if (likely(__pyx_t_11)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2); + __Pyx_INCREF(__pyx_t_11); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_2, function); + } + } + if (!__pyx_t_11) { + __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, ((PyObject *)(&PyInt_Type))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + } else { + __pyx_t_5 = PyTuple_New(1+1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_11); PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_11); __pyx_t_11 = NULL; + __Pyx_INCREF(((PyObject *)(&PyInt_Type))); + __Pyx_GIVEREF(((PyObject *)(&PyInt_Type))); + PyTuple_SET_ITEM(__pyx_t_5, 0+1, ((PyObject *)(&PyInt_Type))); + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_5, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + } + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_v_vocab = __pyx_t_1; + __pyx_t_1 = 0; + + /* "gensim/models/count_words_inner.pyx":69 + * # Use defaultdict to match the pure Python version of the function + * vocab = defaultdict(int) + * for key, word in iteritems(strings): # <<<<<<<<<<<<<< + * vocab[word] = counts[key] + * return vocab, sentence_no + */ + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_iteritems); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_5 = NULL; + if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_2))) { + __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_2); + if (likely(__pyx_t_5)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2); + __Pyx_INCREF(__pyx_t_5); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_2, function); + } + } + if (!__pyx_t_5) { + __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_v_strings); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + } else { + __pyx_t_11 = PyTuple_New(1+1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_11, 0, __pyx_t_5); __pyx_t_5 = NULL; + __Pyx_INCREF(__pyx_v_strings); + __Pyx_GIVEREF(__pyx_v_strings); + PyTuple_SET_ITEM(__pyx_t_11, 0+1, __pyx_v_strings); + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_11, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + } + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { + __pyx_t_2 = __pyx_t_1; __Pyx_INCREF(__pyx_t_2); __pyx_t_3 = 0; + __pyx_t_4 = NULL; + } else { + __pyx_t_3 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_4 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + for (;;) { + if (likely(!__pyx_t_4)) { + if (likely(PyList_CheckExact(__pyx_t_2))) { + if (__pyx_t_3 >= PyList_GET_SIZE(__pyx_t_2)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_1); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_1 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + #endif + } else { + if (__pyx_t_3 >= PyTuple_GET_SIZE(__pyx_t_2)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_1); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_1 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + #endif + } + } else { + __pyx_t_1 = __pyx_t_4(__pyx_t_2); + if (unlikely(!__pyx_t_1)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_1); + } + if ((likely(PyTuple_CheckExact(__pyx_t_1))) || (PyList_CheckExact(__pyx_t_1))) { + PyObject* sequence = __pyx_t_1; + #if CYTHON_COMPILING_IN_CPYTHON + Py_ssize_t size = Py_SIZE(sequence); + #else + Py_ssize_t size = PySequence_Size(sequence); + #endif + if (unlikely(size != 2)) { + if (size > 2) __Pyx_RaiseTooManyValuesError(2); + else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + #if CYTHON_COMPILING_IN_CPYTHON + if (likely(PyTuple_CheckExact(sequence))) { + __pyx_t_11 = PyTuple_GET_ITEM(sequence, 0); + __pyx_t_5 = PyTuple_GET_ITEM(sequence, 1); + } else { + __pyx_t_11 = PyList_GET_ITEM(sequence, 0); + __pyx_t_5 = PyList_GET_ITEM(sequence, 1); + } + __Pyx_INCREF(__pyx_t_11); + __Pyx_INCREF(__pyx_t_5); + #else + __pyx_t_11 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __pyx_t_5 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_5); + #endif + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + } else { + Py_ssize_t index = -1; + __pyx_t_9 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_9); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_17 = Py_TYPE(__pyx_t_9)->tp_iternext; + index = 0; __pyx_t_11 = __pyx_t_17(__pyx_t_9); if (unlikely(!__pyx_t_11)) goto __pyx_L12_unpacking_failed; + __Pyx_GOTREF(__pyx_t_11); + index = 1; __pyx_t_5 = __pyx_t_17(__pyx_t_9); if (unlikely(!__pyx_t_5)) goto __pyx_L12_unpacking_failed; + __Pyx_GOTREF(__pyx_t_5); + if (__Pyx_IternextUnpackEndCheck(__pyx_t_17(__pyx_t_9), 2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = NULL; + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + goto __pyx_L13_unpacking_done; + __pyx_L12_unpacking_failed:; + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __pyx_t_17 = NULL; + if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_L13_unpacking_done:; + } + __pyx_t_14 = __Pyx_PyInt_As_uint64_t(__pyx_t_11); if (unlikely((__pyx_t_14 == (uint64_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + __pyx_v_key = __pyx_t_14; + __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_5); + __pyx_t_5 = 0; + + /* "gensim/models/count_words_inner.pyx":70 + * vocab = defaultdict(int) + * for key, word in iteritems(strings): + * vocab[word] = counts[key] # <<<<<<<<<<<<<< + * return vocab, sentence_no + * + */ + __pyx_t_1 = __Pyx_GetItemInt(((PyObject *)__pyx_v_counts), __pyx_v_key, uint64_t, 0, __Pyx_PyInt_From_uint64_t, 0, 0, 1); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_1); + if (unlikely(PyObject_SetItem(__pyx_v_vocab, __pyx_v_word, __pyx_t_1) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "gensim/models/count_words_inner.pyx":69 + * # Use defaultdict to match the pure Python version of the function + * vocab = defaultdict(int) + * for key, word in iteritems(strings): # <<<<<<<<<<<<<< + * vocab[word] = counts[key] + * return vocab, sentence_no + */ + } + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "gensim/models/count_words_inner.pyx":71 + * for key, word in iteritems(strings): + * vocab[word] = counts[key] + * return vocab, sentence_no # <<<<<<<<<<<<<< + * + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __Pyx_INCREF(__pyx_v_vocab); + __Pyx_GIVEREF(__pyx_v_vocab); + PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_vocab); + __Pyx_INCREF(__pyx_v_sentence_no); + __Pyx_GIVEREF(__pyx_v_sentence_no); + PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_v_sentence_no); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "gensim/models/count_words_inner.pyx":34 + * + * + * def count_words_fast(sentences, count_t min_freq, int progress_per, log_progress): # <<<<<<<<<<<<<< + * cdef PreshCounter counts = PreshCounter() + * strings = {} + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_9); + __Pyx_XDECREF(__pyx_t_10); + __Pyx_XDECREF(__pyx_t_11); + __Pyx_AddTraceback("gensim.models.count_words_inner.count_words_fast", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XDECREF((PyObject *)__pyx_v_counts); + __Pyx_XDECREF(__pyx_v_strings); + __Pyx_XDECREF(__pyx_v_sentence_no); + __Pyx_XDECREF(__pyx_v_total_words); + __Pyx_XDECREF(__pyx_v_sentence); + __Pyx_XDECREF(__pyx_v_word); + __Pyx_XDECREF(__pyx_v_vocab); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyMethodDef __pyx_methods[] = { + {"_hash_string", (PyCFunction)__pyx_pw_6gensim_6models_17count_words_inner_1_hash_string, METH_O, 0}, + {"_hash_bytes", (PyCFunction)__pyx_pw_6gensim_6models_17count_words_inner_3_hash_bytes, METH_O, 0}, + {0, 0, 0, 0} +}; + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef __pyx_moduledef = { + #if PY_VERSION_HEX < 0x03020000 + { PyObject_HEAD_INIT(NULL) NULL, 0, NULL }, + #else + PyModuleDef_HEAD_INIT, + #endif + "count_words_inner", + 0, /* m_doc */ + -1, /* m_size */ + __pyx_methods /* m_methods */, + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL /* m_free */ +}; +#endif + +static __Pyx_StringTabEntry __pyx_string_tab[] = { + {&__pyx_n_s_TypeError, __pyx_k_TypeError, sizeof(__pyx_k_TypeError), 0, 0, 1, 1}, + {&__pyx_kp_s_Users_matt_repos_gensim_gensim, __pyx_k_Users_matt_repos_gensim_gensim, sizeof(__pyx_k_Users_matt_repos_gensim_gensim), 0, 0, 1, 0}, + {&__pyx_n_s_collections, __pyx_k_collections, sizeof(__pyx_k_collections), 0, 0, 1, 1}, + {&__pyx_n_s_count, __pyx_k_count, sizeof(__pyx_k_count), 0, 0, 1, 1}, + {&__pyx_n_s_count_words_fast, __pyx_k_count_words_fast, sizeof(__pyx_k_count_words_fast), 0, 0, 1, 1}, + {&__pyx_n_s_counts, __pyx_k_counts, sizeof(__pyx_k_counts), 0, 0, 1, 1}, + {&__pyx_n_s_defaultdict, __pyx_k_defaultdict, sizeof(__pyx_k_defaultdict), 0, 0, 1, 1}, + {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, + {&__pyx_n_s_gensim_models_count_words_inner, __pyx_k_gensim_models_count_words_inner, sizeof(__pyx_k_gensim_models_count_words_inner), 0, 0, 1, 1}, + {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, + {&__pyx_n_s_iteritems, __pyx_k_iteritems, sizeof(__pyx_k_iteritems), 0, 0, 1, 1}, + {&__pyx_n_s_key, __pyx_k_key, sizeof(__pyx_k_key), 0, 0, 1, 1}, + {&__pyx_n_s_log_progress, __pyx_k_log_progress, sizeof(__pyx_k_log_progress), 0, 0, 1, 1}, + {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, + {&__pyx_n_s_min_freq, __pyx_k_min_freq, sizeof(__pyx_k_min_freq), 0, 0, 1, 1}, + {&__pyx_n_s_progress_per, __pyx_k_progress_per, sizeof(__pyx_k_progress_per), 0, 0, 1, 1}, + {&__pyx_n_s_pyx_vtable, __pyx_k_pyx_vtable, sizeof(__pyx_k_pyx_vtable), 0, 0, 1, 1}, + {&__pyx_n_s_sentence, __pyx_k_sentence, sizeof(__pyx_k_sentence), 0, 0, 1, 1}, + {&__pyx_n_s_sentence_no, __pyx_k_sentence_no, sizeof(__pyx_k_sentence_no), 0, 0, 1, 1}, + {&__pyx_n_s_sentences, __pyx_k_sentences, sizeof(__pyx_k_sentences), 0, 0, 1, 1}, + {&__pyx_n_s_six, __pyx_k_six, sizeof(__pyx_k_six), 0, 0, 1, 1}, + {&__pyx_n_s_strings, __pyx_k_strings, sizeof(__pyx_k_strings), 0, 0, 1, 1}, + {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, + {&__pyx_n_s_total_words, __pyx_k_total_words, sizeof(__pyx_k_total_words), 0, 0, 1, 1}, + {&__pyx_n_s_vocab, __pyx_k_vocab, sizeof(__pyx_k_vocab), 0, 0, 1, 1}, + {&__pyx_n_s_word, __pyx_k_word, sizeof(__pyx_k_word), 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0} +}; +static int __Pyx_InitCachedBuiltins(void) { + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_TypeError = __Pyx_GetBuiltinName(__pyx_n_s_TypeError); if (!__pyx_builtin_TypeError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + return 0; + __pyx_L1_error:; + return -1; +} + +static int __Pyx_InitCachedConstants(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); + + /* "gensim/models/count_words_inner.pyx":34 + * + * + * def count_words_fast(sentences, count_t min_freq, int progress_per, log_progress): # <<<<<<<<<<<<<< + * cdef PreshCounter counts = PreshCounter() + * strings = {} + */ + __pyx_tuple_ = PyTuple_Pack(13, __pyx_n_s_sentences, __pyx_n_s_min_freq, __pyx_n_s_progress_per, __pyx_n_s_log_progress, __pyx_n_s_counts, __pyx_n_s_strings, __pyx_n_s_sentence_no, __pyx_n_s_total_words, __pyx_n_s_key, __pyx_n_s_count, __pyx_n_s_sentence, __pyx_n_s_word, __pyx_n_s_vocab); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple_); + __Pyx_GIVEREF(__pyx_tuple_); + __pyx_codeobj__2 = (PyObject*)__Pyx_PyCode_New(4, 0, 13, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple_, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_matt_repos_gensim_gensim, __pyx_n_s_count_words_fast, 34, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_RefNannyFinishContext(); + return -1; +} + +static int __Pyx_InitGlobals(void) { + if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_int_neg_1 = PyInt_FromLong(-1); if (unlikely(!__pyx_int_neg_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + return 0; + __pyx_L1_error:; + return -1; +} + +#if PY_MAJOR_VERSION < 3 +PyMODINIT_FUNC initcount_words_inner(void); /*proto*/ +PyMODINIT_FUNC initcount_words_inner(void) +#else +PyMODINIT_FUNC PyInit_count_words_inner(void); /*proto*/ +PyMODINIT_FUNC PyInit_count_words_inner(void) +#endif +{ + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannyDeclarations + #if CYTHON_REFNANNY + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); + if (!__Pyx_RefNanny) { + PyErr_Clear(); + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); + if (!__Pyx_RefNanny) + Py_FatalError("failed to import 'refnanny' module"); + } + #endif + __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_count_words_inner(void)", 0); + if (__Pyx_check_binary_version() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #ifdef __Pyx_CyFunction_USED + if (__pyx_CyFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + #ifdef __Pyx_FusedFunction_USED + if (__pyx_FusedFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + #ifdef __Pyx_Coroutine_USED + if (__pyx_Coroutine_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + #ifdef __Pyx_Generator_USED + if (__pyx_Generator_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + #ifdef __Pyx_StopAsyncIteration_USED + if (__pyx_StopAsyncIteration_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + /*--- Library function declarations ---*/ + /*--- Threads initialization code ---*/ + #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS + #ifdef WITH_THREAD /* Python build with threading support? */ + PyEval_InitThreads(); + #endif + #endif + /*--- Module creation code ---*/ + #if PY_MAJOR_VERSION < 3 + __pyx_m = Py_InitModule4("count_words_inner", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); + #else + __pyx_m = PyModule_Create(&__pyx_moduledef); + #endif + if (unlikely(!__pyx_m)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + Py_INCREF(__pyx_d); + __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #if CYTHON_COMPILING_IN_PYPY + Py_INCREF(__pyx_b); + #endif + if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + /*--- Initialize various global constants etc. ---*/ + if (__Pyx_InitGlobals() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) + if (__Pyx_init_sys_getdefaultencoding_params() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + if (__pyx_module_is_main_gensim__models__count_words_inner) { + if (PyObject_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + #if PY_MAJOR_VERSION >= 3 + { + PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!PyDict_GetItemString(modules, "gensim.models.count_words_inner")) { + if (unlikely(PyDict_SetItemString(modules, "gensim.models.count_words_inner", __pyx_m) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + } + #endif + /*--- Builtin init code ---*/ + if (__Pyx_InitCachedBuiltins() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + /*--- Constants init code ---*/ + if (__Pyx_InitCachedConstants() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + /*--- Global init code ---*/ + /*--- Variable export code ---*/ + /*--- Function export code ---*/ + /*--- Type init code ---*/ + /*--- Type import code ---*/ + __pyx_ptype_7cpython_4type_type = __Pyx_ImportType(__Pyx_BUILTIN_MODULE_NAME, "type", + #if CYTHON_COMPILING_IN_PYPY + sizeof(PyTypeObject), + #else + sizeof(PyHeapTypeObject), + #endif + 0); if (unlikely(!__pyx_ptype_7cpython_4type_type)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_7cpython_4bool_bool = __Pyx_ImportType(__Pyx_BUILTIN_MODULE_NAME, "bool", sizeof(PyBoolObject), 0); if (unlikely(!__pyx_ptype_7cpython_4bool_bool)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_7cpython_7complex_complex = __Pyx_ImportType(__Pyx_BUILTIN_MODULE_NAME, "complex", sizeof(PyComplexObject), 0); if (unlikely(!__pyx_ptype_7cpython_7complex_complex)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_5cymem_5cymem_Pool = __Pyx_ImportType("cymem.cymem", "Pool", sizeof(struct __pyx_obj_5cymem_5cymem_Pool), 1); if (unlikely(!__pyx_ptype_5cymem_5cymem_Pool)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_vtabptr_5cymem_5cymem_Pool = (struct __pyx_vtabstruct_5cymem_5cymem_Pool*)__Pyx_GetVtable(__pyx_ptype_5cymem_5cymem_Pool->tp_dict); if (unlikely(!__pyx_vtabptr_5cymem_5cymem_Pool)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_5cymem_5cymem_Address = __Pyx_ImportType("cymem.cymem", "Address", sizeof(struct __pyx_obj_5cymem_5cymem_Address), 1); if (unlikely(!__pyx_ptype_5cymem_5cymem_Address)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_7preshed_4maps_PreshMap = __Pyx_ImportType("preshed.maps", "PreshMap", sizeof(struct __pyx_obj_7preshed_4maps_PreshMap), 1); if (unlikely(!__pyx_ptype_7preshed_4maps_PreshMap)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_vtabptr_7preshed_4maps_PreshMap = (struct __pyx_vtabstruct_7preshed_4maps_PreshMap*)__Pyx_GetVtable(__pyx_ptype_7preshed_4maps_PreshMap->tp_dict); if (unlikely(!__pyx_vtabptr_7preshed_4maps_PreshMap)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_7preshed_4maps_PreshMapArray = __Pyx_ImportType("preshed.maps", "PreshMapArray", sizeof(struct __pyx_obj_7preshed_4maps_PreshMapArray), 1); if (unlikely(!__pyx_ptype_7preshed_4maps_PreshMapArray)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_vtabptr_7preshed_4maps_PreshMapArray = (struct __pyx_vtabstruct_7preshed_4maps_PreshMapArray*)__Pyx_GetVtable(__pyx_ptype_7preshed_4maps_PreshMapArray->tp_dict); if (unlikely(!__pyx_vtabptr_7preshed_4maps_PreshMapArray)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_ptype_7preshed_7counter_PreshCounter = __Pyx_ImportType("preshed.counter", "PreshCounter", sizeof(struct __pyx_obj_7preshed_7counter_PreshCounter), 1); if (unlikely(!__pyx_ptype_7preshed_7counter_PreshCounter)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_vtabptr_7preshed_7counter_PreshCounter = (struct __pyx_vtabstruct_7preshed_7counter_PreshCounter*)__Pyx_GetVtable(__pyx_ptype_7preshed_7counter_PreshCounter->tp_dict); if (unlikely(!__pyx_vtabptr_7preshed_7counter_PreshCounter)) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + /*--- Variable import code ---*/ + /*--- Function import code ---*/ + __pyx_t_1 = __Pyx_ImportModule("murmurhash.mrmr"); if (!__pyx_t_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ImportFunction(__pyx_t_1, "hash64", (void (**)(void))&__pyx_f_10murmurhash_4mrmr_hash64, "uint64_t (void *, int, uint64_t)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + Py_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = __Pyx_ImportModule("preshed.maps"); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ImportFunction(__pyx_t_2, "map_get", (void (**)(void))&__pyx_f_7preshed_4maps_map_get, "void *(struct __pyx_t_7preshed_4maps_MapStruct const *, __pyx_t_7preshed_4maps_key_t const )") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ImportFunction(__pyx_t_2, "map_set", (void (**)(void))&__pyx_f_7preshed_4maps_map_set, "void (struct __pyx_obj_5cymem_5cymem_Pool *, struct __pyx_t_7preshed_4maps_MapStruct *, __pyx_t_7preshed_4maps_key_t, void *)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ImportFunction(__pyx_t_2, "map_init", (void (**)(void))&__pyx_f_7preshed_4maps_map_init, "void (struct __pyx_obj_5cymem_5cymem_Pool *, struct __pyx_t_7preshed_4maps_MapStruct *, size_t)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + Py_DECREF(__pyx_t_2); __pyx_t_2 = 0; + /*--- Execution code ---*/ + #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) + if (__Pyx_patch_abc() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + + /* "gensim/models/count_words_inner.pyx":16 + * from preshed.counter cimport PreshCounter, count_t + * + * from collections import defaultdict # <<<<<<<<<<<<<< + * from six import iteritems + * + */ + __pyx_t_3 = PyList_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_INCREF(__pyx_n_s_defaultdict); + __Pyx_GIVEREF(__pyx_n_s_defaultdict); + PyList_SET_ITEM(__pyx_t_3, 0, __pyx_n_s_defaultdict); + __pyx_t_4 = __Pyx_Import(__pyx_n_s_collections, __pyx_t_3, -1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_4, __pyx_n_s_defaultdict); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_defaultdict, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + + /* "gensim/models/count_words_inner.pyx":17 + * + * from collections import defaultdict + * from six import iteritems # <<<<<<<<<<<<<< + * + * + */ + __pyx_t_4 = PyList_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_INCREF(__pyx_n_s_iteritems); + __Pyx_GIVEREF(__pyx_n_s_iteritems); + PyList_SET_ITEM(__pyx_t_4, 0, __pyx_n_s_iteritems); + __pyx_t_3 = __Pyx_Import(__pyx_n_s_six, __pyx_t_4, -1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_iteritems); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_iteritems, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/count_words_inner.pyx":34 + * + * + * def count_words_fast(sentences, count_t min_freq, int progress_per, log_progress): # <<<<<<<<<<<<<< + * cdef PreshCounter counts = PreshCounter() + * strings = {} + */ + __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_17count_words_inner_5count_words_fast, NULL, __pyx_n_s_gensim_models_count_words_inner); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_count_words_fast, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/count_words_inner.pyx":1 + * #!/usr/bin/env python # <<<<<<<<<<<<<< + * # -*- coding: utf-8 -*- + * # + */ + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /*--- Wrapped vars code ---*/ + + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + if (__pyx_m) { + if (__pyx_d) { + __Pyx_AddTraceback("init gensim.models.count_words_inner", __pyx_clineno, __pyx_lineno, __pyx_filename); + } + Py_DECREF(__pyx_m); __pyx_m = 0; + } else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ImportError, "init gensim.models.count_words_inner"); + } + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + #if PY_MAJOR_VERSION < 3 + return; + #else + return __pyx_m; + #endif +} + +/* --- Runtime support code --- */ +#if CYTHON_REFNANNY +static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { + PyObject *m = NULL, *p = NULL; + void *r = NULL; + m = PyImport_ImportModule((char *)modname); + if (!m) goto end; + p = PyObject_GetAttrString(m, (char *)"RefNannyAPI"); + if (!p) goto end; + r = PyLong_AsVoidPtr(p); +end: + Py_XDECREF(p); + Py_XDECREF(m); + return (__Pyx_RefNannyAPIStruct *)r; +} +#endif + +static PyObject *__Pyx_GetBuiltinName(PyObject *name) { + PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name); + if (unlikely(!result)) { + PyErr_Format(PyExc_NameError, +#if PY_MAJOR_VERSION >= 3 + "name '%U' is not defined", name); +#else + "name '%.200s' is not defined", PyString_AS_STRING(name)); +#endif + } + return result; +} + +static void __Pyx_RaiseArgumentTypeInvalid(const char* name, PyObject *obj, PyTypeObject *type) { + PyErr_Format(PyExc_TypeError, + "Argument '%.200s' has incorrect type (expected %.200s, got %.200s)", + name, type->tp_name, Py_TYPE(obj)->tp_name); +} +static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, + const char *name, int exact) +{ + if (unlikely(!type)) { + PyErr_SetString(PyExc_SystemError, "Missing type object"); + return 0; + } + if (none_allowed && obj == Py_None) return 1; + else if (exact) { + if (likely(Py_TYPE(obj) == type)) return 1; + #if PY_MAJOR_VERSION == 2 + else if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; + #endif + } + else { + if (likely(PyObject_TypeCheck(obj, type))) return 1; + } + __Pyx_RaiseArgumentTypeInvalid(name, obj, type); + return 0; +} + +static void __Pyx_RaiseArgtupleInvalid( + const char* func_name, + int exact, + Py_ssize_t num_min, + Py_ssize_t num_max, + Py_ssize_t num_found) +{ + Py_ssize_t num_expected; + const char *more_or_less; + if (num_found < num_min) { + num_expected = num_min; + more_or_less = "at least"; + } else { + num_expected = num_max; + more_or_less = "at most"; + } + if (exact) { + more_or_less = "exactly"; + } + PyErr_Format(PyExc_TypeError, + "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)", + func_name, more_or_less, num_expected, + (num_expected == 1) ? "" : "s", num_found); +} + +static void __Pyx_RaiseDoubleKeywordsError( + const char* func_name, + PyObject* kw_name) +{ + PyErr_Format(PyExc_TypeError, + #if PY_MAJOR_VERSION >= 3 + "%s() got multiple values for keyword argument '%U'", func_name, kw_name); + #else + "%s() got multiple values for keyword argument '%s'", func_name, + PyString_AsString(kw_name)); + #endif +} + +static int __Pyx_ParseOptionalKeywords( + PyObject *kwds, + PyObject **argnames[], + PyObject *kwds2, + PyObject *values[], + Py_ssize_t num_pos_args, + const char* function_name) +{ + PyObject *key = 0, *value = 0; + Py_ssize_t pos = 0; + PyObject*** name; + PyObject*** first_kw_arg = argnames + num_pos_args; + while (PyDict_Next(kwds, &pos, &key, &value)) { + name = first_kw_arg; + while (*name && (**name != key)) name++; + if (*name) { + values[name-argnames] = value; + continue; + } + name = first_kw_arg; + #if PY_MAJOR_VERSION < 3 + if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) { + while (*name) { + if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) + && _PyString_Eq(**name, key)) { + values[name-argnames] = value; + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + if ((**argname == key) || ( + (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) + && _PyString_Eq(**argname, key))) { + goto arg_passed_twice; + } + argname++; + } + } + } else + #endif + if (likely(PyUnicode_Check(key))) { + while (*name) { + int cmp = (**name == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 : + #endif + PyUnicode_Compare(**name, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) { + values[name-argnames] = value; + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + int cmp = (**argname == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 : + #endif + PyUnicode_Compare(**argname, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) goto arg_passed_twice; + argname++; + } + } + } else + goto invalid_keyword_type; + if (kwds2) { + if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; + } else { + goto invalid_keyword; + } + } + return 0; +arg_passed_twice: + __Pyx_RaiseDoubleKeywordsError(function_name, key); + goto bad; +invalid_keyword_type: + PyErr_Format(PyExc_TypeError, + "%.200s() keywords must be strings", function_name); + goto bad; +invalid_keyword: + PyErr_Format(PyExc_TypeError, + #if PY_MAJOR_VERSION < 3 + "%.200s() got an unexpected keyword argument '%.200s'", + function_name, PyString_AsString(key)); + #else + "%s() got an unexpected keyword argument '%U'", + function_name, key); + #endif +bad: + return -1; +} + +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *result; + ternaryfunc call = func->ob_type->tp_call; + if (unlikely(!call)) + return PyObject_Call(func, arg, kw); + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + result = (*call)(func, arg, kw); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +#if CYTHON_USE_PYLONG_INTERNALS + #include "longintrepr.h" +#endif + +#if CYTHON_COMPILING_IN_CPYTHON +static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, CYTHON_UNUSED long intval, CYTHON_UNUSED int inplace) { + #if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(op1))) { + const long b = intval; + long x; + long a = PyInt_AS_LONG(op1); + x = (long)((unsigned long)a + b); + if (likely((x^a) >= 0 || (x^b) >= 0)) + return PyInt_FromLong(x); + return PyLong_Type.tp_as_number->nb_add(op1, op2); + } + #endif + #if CYTHON_USE_PYLONG_INTERNALS && PY_MAJOR_VERSION >= 3 + if (likely(PyLong_CheckExact(op1))) { + const long b = intval; + long a, x; + const PY_LONG_LONG llb = intval; + PY_LONG_LONG lla, llx; + const digit* digits = ((PyLongObject*)op1)->ob_digit; + const Py_ssize_t size = Py_SIZE(op1); + if (likely(__Pyx_sst_abs(size) <= 1)) { + a = likely(size) ? digits[0] : 0; + if (size == -1) a = -a; + } else { + switch (size) { + case -2: + if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + a = -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { + lla = -(PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; + } + case 2: + if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + a = (long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { + lla = (PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; + } + case -3: + if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + a = -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { + lla = -(PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; + } + case 3: + if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + a = (long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { + lla = (PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; + } + case -4: + if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + a = -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { + lla = -(PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; + } + case 4: + if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + a = (long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { + lla = (PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + goto long_long; + } + default: return PyLong_Type.tp_as_number->nb_add(op1, op2); + } + } + x = a + b; + return PyLong_FromLong(x); + long_long: + llx = lla + llb; + return PyLong_FromLongLong(llx); + } + #endif + if (PyFloat_CheckExact(op1)) { + const long b = intval; + double a = PyFloat_AS_DOUBLE(op1); + double result; + PyFPE_START_PROTECT("add", return NULL) + result = ((double)a) + (double)b; + PyFPE_END_PROTECT(result) + return PyFloat_FromDouble(result); + } + return (inplace ? PyNumber_InPlaceAdd : PyNumber_Add)(op1, op2); +} +#endif + +#if CYTHON_COMPILING_IN_CPYTHON +static PyObject* __Pyx_PyInt_EqObjC(PyObject *op1, PyObject *op2, CYTHON_UNUSED long intval, CYTHON_UNUSED int inplace) { + if (op1 == op2) { + Py_RETURN_TRUE; + } + #if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(op1))) { + const long b = intval; + long a = PyInt_AS_LONG(op1); + if (a == b) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } + } + #endif + #if CYTHON_USE_PYLONG_INTERNALS && PY_MAJOR_VERSION >= 3 + if (likely(PyLong_CheckExact(op1))) { + const long b = intval; + long a; + const digit* digits = ((PyLongObject*)op1)->ob_digit; + const Py_ssize_t size = Py_SIZE(op1); + if (likely(__Pyx_sst_abs(size) <= 1)) { + a = likely(size) ? digits[0] : 0; + if (size == -1) a = -a; + } else { + switch (size) { + case -2: + if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + a = -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + } + case 2: + if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + a = (long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + } + case -3: + if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + a = -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + } + case 3: + if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + a = (long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + } + case -4: + if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + a = -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + } + case 4: + if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + a = (long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])); + break; + } + #if PyLong_SHIFT < 30 && PyLong_SHIFT != 15 + default: return PyLong_Type.tp_richcompare(op1, op2, Py_EQ); + #else + default: Py_RETURN_FALSE; + #endif + } + } + if (a == b) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } + } + #endif + if (PyFloat_CheckExact(op1)) { + const long b = intval; + double a = PyFloat_AS_DOUBLE(op1); + if ((double)a == (double)b) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } + } + return PyObject_RichCompare(op1, op2, Py_EQ); +} +#endif + +static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb) { +#if CYTHON_COMPILING_IN_CPYTHON + PyObject *tmp_type, *tmp_value, *tmp_tb; + PyThreadState *tstate = PyThreadState_GET(); + tmp_type = tstate->curexc_type; + tmp_value = tstate->curexc_value; + tmp_tb = tstate->curexc_traceback; + tstate->curexc_type = type; + tstate->curexc_value = value; + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +#else + PyErr_Restore(type, value, tb); +#endif +} +static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb) { +#if CYTHON_COMPILING_IN_CPYTHON + PyThreadState *tstate = PyThreadState_GET(); + *type = tstate->curexc_type; + *value = tstate->curexc_value; + *tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; +#else + PyErr_Fetch(type, value, tb); +#endif +} + +#if PY_MAJOR_VERSION < 3 +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, + CYTHON_UNUSED PyObject *cause) { + Py_XINCREF(type); + if (!value || value == Py_None) + value = NULL; + else + Py_INCREF(value); + if (!tb || tb == Py_None) + tb = NULL; + else { + Py_INCREF(tb); + if (!PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "raise: arg 3 must be a traceback or None"); + goto raise_error; + } + } + if (PyType_Check(type)) { +#if CYTHON_COMPILING_IN_PYPY + if (!value) { + Py_INCREF(Py_None); + value = Py_None; + } +#endif + PyErr_NormalizeException(&type, &value, &tb); + } else { + if (value) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto raise_error; + } + value = type; + type = (PyObject*) Py_TYPE(type); + Py_INCREF(type); + if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) { + PyErr_SetString(PyExc_TypeError, + "raise: exception class must be a subclass of BaseException"); + goto raise_error; + } + } + __Pyx_ErrRestore(type, value, tb); + return; +raise_error: + Py_XDECREF(value); + Py_XDECREF(type); + Py_XDECREF(tb); + return; +} +#else +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { + PyObject* owned_instance = NULL; + if (tb == Py_None) { + tb = 0; + } else if (tb && !PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "raise: arg 3 must be a traceback or None"); + goto bad; + } + if (value == Py_None) + value = 0; + if (PyExceptionInstance_Check(type)) { + if (value) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto bad; + } + value = type; + type = (PyObject*) Py_TYPE(value); + } else if (PyExceptionClass_Check(type)) { + PyObject *instance_class = NULL; + if (value && PyExceptionInstance_Check(value)) { + instance_class = (PyObject*) Py_TYPE(value); + if (instance_class != type) { + int is_subclass = PyObject_IsSubclass(instance_class, type); + if (!is_subclass) { + instance_class = NULL; + } else if (unlikely(is_subclass == -1)) { + goto bad; + } else { + type = instance_class; + } + } + } + if (!instance_class) { + PyObject *args; + if (!value) + args = PyTuple_New(0); + else if (PyTuple_Check(value)) { + Py_INCREF(value); + args = value; + } else + args = PyTuple_Pack(1, value); + if (!args) + goto bad; + owned_instance = PyObject_Call(type, args, NULL); + Py_DECREF(args); + if (!owned_instance) + goto bad; + value = owned_instance; + if (!PyExceptionInstance_Check(value)) { + PyErr_Format(PyExc_TypeError, + "calling %R should have returned an instance of " + "BaseException, not %R", + type, Py_TYPE(value)); + goto bad; + } + } + } else { + PyErr_SetString(PyExc_TypeError, + "raise: exception class must be a subclass of BaseException"); + goto bad; + } +#if PY_VERSION_HEX >= 0x03030000 + if (cause) { +#else + if (cause && cause != Py_None) { +#endif + PyObject *fixed_cause; + if (cause == Py_None) { + fixed_cause = NULL; + } else if (PyExceptionClass_Check(cause)) { + fixed_cause = PyObject_CallObject(cause, NULL); + if (fixed_cause == NULL) + goto bad; + } else if (PyExceptionInstance_Check(cause)) { + fixed_cause = cause; + Py_INCREF(fixed_cause); + } else { + PyErr_SetString(PyExc_TypeError, + "exception causes must derive from " + "BaseException"); + goto bad; + } + PyException_SetCause(value, fixed_cause); + } + PyErr_SetObject(type, value); + if (tb) { +#if CYTHON_COMPILING_IN_PYPY + PyObject *tmp_type, *tmp_value, *tmp_tb; + PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb); + Py_INCREF(tb); + PyErr_Restore(tmp_type, tmp_value, tb); + Py_XDECREF(tmp_tb); +#else + PyThreadState *tstate = PyThreadState_GET(); + PyObject* tmp_tb = tstate->curexc_traceback; + if (tb != tmp_tb) { + Py_INCREF(tb); + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_tb); + } +#endif + } +bad: + Py_XDECREF(owned_instance); + return; +} +#endif + +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) { + PyObject *r; + if (!j) return NULL; + r = PyObject_GetItem(o, j); + Py_DECREF(j); + return r; +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_COMPILING_IN_CPYTHON + if (wraparound & unlikely(i < 0)) i += PyList_GET_SIZE(o); + if ((!boundscheck) || likely((0 <= i) & (i < PyList_GET_SIZE(o)))) { + PyObject *r = PyList_GET_ITEM(o, i); + Py_INCREF(r); + return r; + } + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +#else + return PySequence_GetItem(o, i); +#endif +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_COMPILING_IN_CPYTHON + if (wraparound & unlikely(i < 0)) i += PyTuple_GET_SIZE(o); + if ((!boundscheck) || likely((0 <= i) & (i < PyTuple_GET_SIZE(o)))) { + PyObject *r = PyTuple_GET_ITEM(o, i); + Py_INCREF(r); + return r; + } + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +#else + return PySequence_GetItem(o, i); +#endif +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_COMPILING_IN_CPYTHON + if (is_list || PyList_CheckExact(o)) { + Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o); + if ((!boundscheck) || (likely((n >= 0) & (n < PyList_GET_SIZE(o))))) { + PyObject *r = PyList_GET_ITEM(o, n); + Py_INCREF(r); + return r; + } + } + else if (PyTuple_CheckExact(o)) { + Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o); + if ((!boundscheck) || likely((n >= 0) & (n < PyTuple_GET_SIZE(o)))) { + PyObject *r = PyTuple_GET_ITEM(o, n); + Py_INCREF(r); + return r; + } + } else { + PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence; + if (likely(m && m->sq_item)) { + if (wraparound && unlikely(i < 0) && likely(m->sq_length)) { + Py_ssize_t l = m->sq_length(o); + if (likely(l >= 0)) { + i += l; + } else { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) + PyErr_Clear(); + else + return NULL; + } + } + return m->sq_item(o, i); + } + } +#else + if (is_list || PySequence_Check(o)) { + return PySequence_GetItem(o, i); + } +#endif + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +} + +static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name) { + PyObject *result; +#if CYTHON_COMPILING_IN_CPYTHON + result = PyDict_GetItem(__pyx_d, name); + if (likely(result)) { + Py_INCREF(result); + } else { +#else + result = PyObject_GetItem(__pyx_d, name); + if (!result) { + PyErr_Clear(); +#endif + result = __Pyx_GetBuiltinName(name); + } + return result; +} + +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { + PyObject *self, *result; + PyCFunction cfunc; + cfunc = PyCFunction_GET_FUNCTION(func); + self = PyCFunction_GET_SELF(func); + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + result = cfunc(self, arg); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +#if CYTHON_COMPILING_IN_CPYTHON +static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *result; + PyObject *args = PyTuple_New(1); + if (unlikely(!args)) return NULL; + Py_INCREF(arg); + PyTuple_SET_ITEM(args, 0, arg); + result = __Pyx_PyObject_Call(func, args, NULL); + Py_DECREF(args); + return result; +} +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { +#ifdef __Pyx_CyFunction_USED + if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) { +#else + if (likely(PyCFunction_Check(func))) { +#endif + if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { + return __Pyx_PyObject_CallMethO(func, arg); + } + } + return __Pyx__PyObject_CallOneArg(func, arg); +} +#else +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *result; + PyObject *args = PyTuple_Pack(1, arg); + if (unlikely(!args)) return NULL; + result = __Pyx_PyObject_Call(func, args, NULL); + Py_DECREF(args); + return result; +} +#endif + +static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) { + PyErr_Format(PyExc_ValueError, + "too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected); +} + +static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) { + PyErr_Format(PyExc_ValueError, + "need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack", + index, (index == 1) ? "" : "s"); +} + +static CYTHON_INLINE int __Pyx_IterFinish(void) { +#if CYTHON_COMPILING_IN_CPYTHON + PyThreadState *tstate = PyThreadState_GET(); + PyObject* exc_type = tstate->curexc_type; + if (unlikely(exc_type)) { + if (likely(exc_type == PyExc_StopIteration) || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration)) { + PyObject *exc_value, *exc_tb; + exc_value = tstate->curexc_value; + exc_tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; + Py_DECREF(exc_type); + Py_XDECREF(exc_value); + Py_XDECREF(exc_tb); + return 0; + } else { + return -1; + } + } + return 0; +#else + if (unlikely(PyErr_Occurred())) { + if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) { + PyErr_Clear(); + return 0; + } else { + return -1; + } + } + return 0; +#endif +} + +static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected) { + if (unlikely(retval)) { + Py_DECREF(retval); + __Pyx_RaiseTooManyValuesError(expected); + return -1; + } else { + return __Pyx_IterFinish(); + } + return 0; +} + +static void* __Pyx_GetVtable(PyObject *dict) { + void* ptr; + PyObject *ob = PyObject_GetItem(dict, __pyx_n_s_pyx_vtable); + if (!ob) + goto bad; +#if PY_VERSION_HEX >= 0x02070000 + ptr = PyCapsule_GetPointer(ob, 0); +#else + ptr = PyCObject_AsVoidPtr(ob); +#endif + if (!ptr && !PyErr_Occurred()) + PyErr_SetString(PyExc_RuntimeError, "invalid vtable found for imported type"); + Py_DECREF(ob); + return ptr; +bad: + Py_XDECREF(ob); + return NULL; +} + +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { + PyObject *empty_list = 0; + PyObject *module = 0; + PyObject *global_dict = 0; + PyObject *empty_dict = 0; + PyObject *list; + #if PY_VERSION_HEX < 0x03030000 + PyObject *py_import; + py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); + if (!py_import) + goto bad; + #endif + if (from_list) + list = from_list; + else { + empty_list = PyList_New(0); + if (!empty_list) + goto bad; + list = empty_list; + } + global_dict = PyModule_GetDict(__pyx_m); + if (!global_dict) + goto bad; + empty_dict = PyDict_New(); + if (!empty_dict) + goto bad; + { + #if PY_MAJOR_VERSION >= 3 + if (level == -1) { + if (strchr(__Pyx_MODULE_NAME, '.')) { + #if PY_VERSION_HEX < 0x03030000 + PyObject *py_level = PyInt_FromLong(1); + if (!py_level) + goto bad; + module = PyObject_CallFunctionObjArgs(py_import, + name, global_dict, empty_dict, list, py_level, NULL); + Py_DECREF(py_level); + #else + module = PyImport_ImportModuleLevelObject( + name, global_dict, empty_dict, list, 1); + #endif + if (!module) { + if (!PyErr_ExceptionMatches(PyExc_ImportError)) + goto bad; + PyErr_Clear(); + } + } + level = 0; + } + #endif + if (!module) { + #if PY_VERSION_HEX < 0x03030000 + PyObject *py_level = PyInt_FromLong(level); + if (!py_level) + goto bad; + module = PyObject_CallFunctionObjArgs(py_import, + name, global_dict, empty_dict, list, py_level, NULL); + Py_DECREF(py_level); + #else + module = PyImport_ImportModuleLevelObject( + name, global_dict, empty_dict, list, level); + #endif + } + } +bad: + #if PY_VERSION_HEX < 0x03030000 + Py_XDECREF(py_import); + #endif + Py_XDECREF(empty_list); + Py_XDECREF(empty_dict); + return module; +} + +static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { + PyObject* value = __Pyx_PyObject_GetAttrStr(module, name); + if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Format(PyExc_ImportError, + #if PY_MAJOR_VERSION < 3 + "cannot import name %.230s", PyString_AS_STRING(name)); + #else + "cannot import name %S", name); + #endif + } + return value; +} + +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { + int start = 0, mid = 0, end = count - 1; + if (end >= 0 && code_line > entries[end].code_line) { + return count; + } + while (start < end) { + mid = start + (end - start) / 2; + if (code_line < entries[mid].code_line) { + end = mid; + } else if (code_line > entries[mid].code_line) { + start = mid + 1; + } else { + return mid; + } + } + if (code_line <= entries[mid].code_line) { + return mid; + } else { + return mid + 1; + } +} +static PyCodeObject *__pyx_find_code_object(int code_line) { + PyCodeObject* code_object; + int pos; + if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { + return NULL; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { + return NULL; + } + code_object = __pyx_code_cache.entries[pos].code_object; + Py_INCREF(code_object); + return code_object; +} +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { + int pos, i; + __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; + if (unlikely(!code_line)) { + return; + } + if (unlikely(!entries)) { + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); + if (likely(entries)) { + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = 64; + __pyx_code_cache.count = 1; + entries[0].code_line = code_line; + entries[0].code_object = code_object; + Py_INCREF(code_object); + } + return; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { + PyCodeObject* tmp = entries[pos].code_object; + entries[pos].code_object = code_object; + Py_DECREF(tmp); + return; + } + if (__pyx_code_cache.count == __pyx_code_cache.max_count) { + int new_max = __pyx_code_cache.max_count + 64; + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( + __pyx_code_cache.entries, (size_t)new_max*sizeof(__Pyx_CodeObjectCacheEntry)); + if (unlikely(!entries)) { + return; + } + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = new_max; + } + for (i=__pyx_code_cache.count; i>pos; i--) { + entries[i] = entries[i-1]; + } + entries[pos].code_line = code_line; + entries[pos].code_object = code_object; + __pyx_code_cache.count++; + Py_INCREF(code_object); +} + +#include "compile.h" +#include "frameobject.h" +#include "traceback.h" +static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( + const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyObject *py_srcfile = 0; + PyObject *py_funcname = 0; + #if PY_MAJOR_VERSION < 3 + py_srcfile = PyString_FromString(filename); + #else + py_srcfile = PyUnicode_FromString(filename); + #endif + if (!py_srcfile) goto bad; + if (c_line) { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + #else + py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + #endif + } + else { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromString(funcname); + #else + py_funcname = PyUnicode_FromString(funcname); + #endif + } + if (!py_funcname) goto bad; + py_code = __Pyx_PyCode_New( + 0, + 0, + 0, + 0, + 0, + __pyx_empty_bytes, /*PyObject *code,*/ + __pyx_empty_tuple, /*PyObject *consts,*/ + __pyx_empty_tuple, /*PyObject *names,*/ + __pyx_empty_tuple, /*PyObject *varnames,*/ + __pyx_empty_tuple, /*PyObject *freevars,*/ + __pyx_empty_tuple, /*PyObject *cellvars,*/ + py_srcfile, /*PyObject *filename,*/ + py_funcname, /*PyObject *name,*/ + py_line, + __pyx_empty_bytes /*PyObject *lnotab*/ + ); + Py_DECREF(py_srcfile); + Py_DECREF(py_funcname); + return py_code; +bad: + Py_XDECREF(py_srcfile); + Py_XDECREF(py_funcname); + return NULL; +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyFrameObject *py_frame = 0; + py_code = __pyx_find_code_object(c_line ? c_line : py_line); + if (!py_code) { + py_code = __Pyx_CreateCodeObjectForTraceback( + funcname, c_line, py_line, filename); + if (!py_code) goto bad; + __pyx_insert_code_object(c_line ? c_line : py_line, py_code); + } + py_frame = PyFrame_New( + PyThreadState_GET(), /*PyThreadState *tstate,*/ + py_code, /*PyCodeObject *code,*/ + __pyx_d, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (!py_frame) goto bad; + py_frame->f_lineno = py_line; + PyTraceBack_Here(py_frame); +bad: + Py_XDECREF(py_code); + Py_XDECREF(py_frame); +} + +#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) +#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) +#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ + {\ + func_type value = func_value;\ + if (sizeof(target_type) < sizeof(func_type)) {\ + if (unlikely(value != (func_type) (target_type) value)) {\ + func_type zero = 0;\ + if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ + return (target_type) -1;\ + if (is_unsigned && unlikely(value < zero))\ + goto raise_neg_overflow;\ + else\ + goto raise_overflow;\ + }\ + }\ + return (target_type) value;\ + } + +static CYTHON_INLINE int64_t __Pyx_PyInt_As_int64_t(PyObject *x) { + const int64_t neg_one = (int64_t) -1, const_zero = (int64_t) 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(int64_t) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(int64_t, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (int64_t) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (int64_t) 0; + case 1: __PYX_VERIFY_RETURN_INT(int64_t, digit, digits[0]) + case 2: + if (8 * sizeof(int64_t) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int64_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int64_t) >= 2 * PyLong_SHIFT) { + return (int64_t) (((((int64_t)digits[1]) << PyLong_SHIFT) | (int64_t)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(int64_t) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int64_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int64_t) >= 3 * PyLong_SHIFT) { + return (int64_t) (((((((int64_t)digits[2]) << PyLong_SHIFT) | (int64_t)digits[1]) << PyLong_SHIFT) | (int64_t)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(int64_t) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int64_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int64_t) >= 4 * PyLong_SHIFT) { + return (int64_t) (((((((((int64_t)digits[3]) << PyLong_SHIFT) | (int64_t)digits[2]) << PyLong_SHIFT) | (int64_t)digits[1]) << PyLong_SHIFT) | (int64_t)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (int64_t) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(int64_t) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(int64_t, unsigned long, PyLong_AsUnsignedLong(x)) + } else if (sizeof(int64_t) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(int64_t, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (int64_t) 0; + case -1: __PYX_VERIFY_RETURN_INT(int64_t, sdigit, -(sdigit) digits[0]) + case 1: __PYX_VERIFY_RETURN_INT(int64_t, digit, +digits[0]) + case -2: + if (8 * sizeof(int64_t) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int64_t, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int64_t) - 1 > 2 * PyLong_SHIFT) { + return (int64_t) (((int64_t)-1)*(((((int64_t)digits[1]) << PyLong_SHIFT) | (int64_t)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(int64_t) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int64_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int64_t) - 1 > 2 * PyLong_SHIFT) { + return (int64_t) ((((((int64_t)digits[1]) << PyLong_SHIFT) | (int64_t)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(int64_t) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int64_t, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int64_t) - 1 > 3 * PyLong_SHIFT) { + return (int64_t) (((int64_t)-1)*(((((((int64_t)digits[2]) << PyLong_SHIFT) | (int64_t)digits[1]) << PyLong_SHIFT) | (int64_t)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(int64_t) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int64_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int64_t) - 1 > 3 * PyLong_SHIFT) { + return (int64_t) ((((((((int64_t)digits[2]) << PyLong_SHIFT) | (int64_t)digits[1]) << PyLong_SHIFT) | (int64_t)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(int64_t) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int64_t, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int64_t) - 1 > 4 * PyLong_SHIFT) { + return (int64_t) (((int64_t)-1)*(((((((((int64_t)digits[3]) << PyLong_SHIFT) | (int64_t)digits[2]) << PyLong_SHIFT) | (int64_t)digits[1]) << PyLong_SHIFT) | (int64_t)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(int64_t) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int64_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int64_t) - 1 > 4 * PyLong_SHIFT) { + return (int64_t) ((((((((((int64_t)digits[3]) << PyLong_SHIFT) | (int64_t)digits[2]) << PyLong_SHIFT) | (int64_t)digits[1]) << PyLong_SHIFT) | (int64_t)digits[0]))); + } + } + break; + } +#endif + if (sizeof(int64_t) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(int64_t, long, PyLong_AsLong(x)) + } else if (sizeof(int64_t) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(int64_t, PY_LONG_LONG, PyLong_AsLongLong(x)) + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + int64_t val; + PyObject *v = __Pyx_PyNumber_Int(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (int64_t) -1; + } + } else { + int64_t val; + PyObject *tmp = __Pyx_PyNumber_Int(x); + if (!tmp) return (int64_t) -1; + val = __Pyx_PyInt_As_int64_t(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to int64_t"); + return (int64_t) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to int64_t"); + return (int64_t) -1; +} + +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { + const int neg_one = (int) -1, const_zero = (int) 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(int) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (int) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (int) 0; + case 1: __PYX_VERIFY_RETURN_INT(int, digit, digits[0]) + case 2: + if (8 * sizeof(int) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) >= 2 * PyLong_SHIFT) { + return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(int) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) >= 3 * PyLong_SHIFT) { + return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(int) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) >= 4 * PyLong_SHIFT) { + return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (int) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(int) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) + } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (int) 0; + case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, -(sdigit) digits[0]) + case 1: __PYX_VERIFY_RETURN_INT(int, digit, +digits[0]) + case -2: + if (8 * sizeof(int) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { + return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(int) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { + return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { + return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(int) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { + return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { + return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(int) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { + return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + } +#endif + if (sizeof(int) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) + } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + int val; + PyObject *v = __Pyx_PyNumber_Int(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (int) -1; + } + } else { + int val; + PyObject *tmp = __Pyx_PyNumber_Int(x); + if (!tmp) return (int) -1; + val = __Pyx_PyInt_As_int(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to int"); + return (int) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to int"); + return (int) -1; +} + +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_uint64_t(uint64_t value) { + const uint64_t neg_one = (uint64_t) -1, const_zero = (uint64_t) 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(uint64_t) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(uint64_t) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); + } else if (sizeof(uint64_t) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); + } + } else { + if (sizeof(uint64_t) <= sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(uint64_t) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(uint64_t), + little, !is_unsigned); + } +} + +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { + const int neg_one = (int) -1, const_zero = (int) 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(int) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(int) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); + } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); + } + } else { + if (sizeof(int) <= sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(int), + little, !is_unsigned); + } +} + +static CYTHON_INLINE uint64_t __Pyx_PyInt_As_uint64_t(PyObject *x) { + const uint64_t neg_one = (uint64_t) -1, const_zero = (uint64_t) 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(uint64_t) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(uint64_t, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (uint64_t) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (uint64_t) 0; + case 1: __PYX_VERIFY_RETURN_INT(uint64_t, digit, digits[0]) + case 2: + if (8 * sizeof(uint64_t) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(uint64_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(uint64_t) >= 2 * PyLong_SHIFT) { + return (uint64_t) (((((uint64_t)digits[1]) << PyLong_SHIFT) | (uint64_t)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(uint64_t) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(uint64_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(uint64_t) >= 3 * PyLong_SHIFT) { + return (uint64_t) (((((((uint64_t)digits[2]) << PyLong_SHIFT) | (uint64_t)digits[1]) << PyLong_SHIFT) | (uint64_t)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(uint64_t) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(uint64_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(uint64_t) >= 4 * PyLong_SHIFT) { + return (uint64_t) (((((((((uint64_t)digits[3]) << PyLong_SHIFT) | (uint64_t)digits[2]) << PyLong_SHIFT) | (uint64_t)digits[1]) << PyLong_SHIFT) | (uint64_t)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (uint64_t) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(uint64_t) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(uint64_t, unsigned long, PyLong_AsUnsignedLong(x)) + } else if (sizeof(uint64_t) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(uint64_t, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (uint64_t) 0; + case -1: __PYX_VERIFY_RETURN_INT(uint64_t, sdigit, -(sdigit) digits[0]) + case 1: __PYX_VERIFY_RETURN_INT(uint64_t, digit, +digits[0]) + case -2: + if (8 * sizeof(uint64_t) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(uint64_t, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(uint64_t) - 1 > 2 * PyLong_SHIFT) { + return (uint64_t) (((uint64_t)-1)*(((((uint64_t)digits[1]) << PyLong_SHIFT) | (uint64_t)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(uint64_t) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(uint64_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(uint64_t) - 1 > 2 * PyLong_SHIFT) { + return (uint64_t) ((((((uint64_t)digits[1]) << PyLong_SHIFT) | (uint64_t)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(uint64_t) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(uint64_t, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(uint64_t) - 1 > 3 * PyLong_SHIFT) { + return (uint64_t) (((uint64_t)-1)*(((((((uint64_t)digits[2]) << PyLong_SHIFT) | (uint64_t)digits[1]) << PyLong_SHIFT) | (uint64_t)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(uint64_t) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(uint64_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(uint64_t) - 1 > 3 * PyLong_SHIFT) { + return (uint64_t) ((((((((uint64_t)digits[2]) << PyLong_SHIFT) | (uint64_t)digits[1]) << PyLong_SHIFT) | (uint64_t)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(uint64_t) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(uint64_t, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(uint64_t) - 1 > 4 * PyLong_SHIFT) { + return (uint64_t) (((uint64_t)-1)*(((((((((uint64_t)digits[3]) << PyLong_SHIFT) | (uint64_t)digits[2]) << PyLong_SHIFT) | (uint64_t)digits[1]) << PyLong_SHIFT) | (uint64_t)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(uint64_t) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(uint64_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(uint64_t) - 1 > 4 * PyLong_SHIFT) { + return (uint64_t) ((((((((((uint64_t)digits[3]) << PyLong_SHIFT) | (uint64_t)digits[2]) << PyLong_SHIFT) | (uint64_t)digits[1]) << PyLong_SHIFT) | (uint64_t)digits[0]))); + } + } + break; + } +#endif + if (sizeof(uint64_t) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(uint64_t, long, PyLong_AsLong(x)) + } else if (sizeof(uint64_t) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(uint64_t, PY_LONG_LONG, PyLong_AsLongLong(x)) + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + uint64_t val; + PyObject *v = __Pyx_PyNumber_Int(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (uint64_t) -1; + } + } else { + uint64_t val; + PyObject *tmp = __Pyx_PyNumber_Int(x); + if (!tmp) return (uint64_t) -1; + val = __Pyx_PyInt_As_uint64_t(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to uint64_t"); + return (uint64_t) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to uint64_t"); + return (uint64_t) -1; +} + +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { + const long neg_one = (long) -1, const_zero = (long) 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(long) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(long) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); + } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); + } + } else { + if (sizeof(long) <= sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(long), + little, !is_unsigned); + } +} + +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { + const long neg_one = (long) -1, const_zero = (long) 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(long) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (long) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (long) 0; + case 1: __PYX_VERIFY_RETURN_INT(long, digit, digits[0]) + case 2: + if (8 * sizeof(long) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) >= 2 * PyLong_SHIFT) { + return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(long) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) >= 3 * PyLong_SHIFT) { + return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(long) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) >= 4 * PyLong_SHIFT) { + return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (long) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(long) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) + } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (long) 0; + case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, -(sdigit) digits[0]) + case 1: __PYX_VERIFY_RETURN_INT(long, digit, +digits[0]) + case -2: + if (8 * sizeof(long) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(long) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(long) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(long) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + } +#endif + if (sizeof(long) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) + } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + long val; + PyObject *v = __Pyx_PyNumber_Int(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (long) -1; + } + } else { + long val; + PyObject *tmp = __Pyx_PyNumber_Int(x); + if (!tmp) return (long) -1; + val = __Pyx_PyInt_As_long(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to long"); + return (long) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to long"); + return (long) -1; +} + +static int __Pyx_check_binary_version(void) { + char ctversion[4], rtversion[4]; + PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION); + PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion()); + if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) { + char message[200]; + PyOS_snprintf(message, sizeof(message), + "compiletime version %s of module '%.100s' " + "does not match runtime version %s", + ctversion, __Pyx_MODULE_NAME, rtversion); + return PyErr_WarnEx(NULL, message, 1); + } + return 0; +} + +#ifndef __PYX_HAVE_RT_ImportModule +#define __PYX_HAVE_RT_ImportModule +static PyObject *__Pyx_ImportModule(const char *name) { + PyObject *py_name = 0; + PyObject *py_module = 0; + py_name = __Pyx_PyIdentifier_FromString(name); + if (!py_name) + goto bad; + py_module = PyImport_Import(py_name); + Py_DECREF(py_name); + return py_module; +bad: + Py_XDECREF(py_name); + return 0; +} +#endif + +#ifndef __PYX_HAVE_RT_ImportType +#define __PYX_HAVE_RT_ImportType +static PyTypeObject *__Pyx_ImportType(const char *module_name, const char *class_name, + size_t size, int strict) +{ + PyObject *py_module = 0; + PyObject *result = 0; + PyObject *py_name = 0; + char warning[200]; + Py_ssize_t basicsize; +#ifdef Py_LIMITED_API + PyObject *py_basicsize; +#endif + py_module = __Pyx_ImportModule(module_name); + if (!py_module) + goto bad; + py_name = __Pyx_PyIdentifier_FromString(class_name); + if (!py_name) + goto bad; + result = PyObject_GetAttr(py_module, py_name); + Py_DECREF(py_name); + py_name = 0; + Py_DECREF(py_module); + py_module = 0; + if (!result) + goto bad; + if (!PyType_Check(result)) { + PyErr_Format(PyExc_TypeError, + "%.200s.%.200s is not a type object", + module_name, class_name); + goto bad; + } +#ifndef Py_LIMITED_API + basicsize = ((PyTypeObject *)result)->tp_basicsize; +#else + py_basicsize = PyObject_GetAttrString(result, "__basicsize__"); + if (!py_basicsize) + goto bad; + basicsize = PyLong_AsSsize_t(py_basicsize); + Py_DECREF(py_basicsize); + py_basicsize = 0; + if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred()) + goto bad; +#endif + if (!strict && (size_t)basicsize > size) { + PyOS_snprintf(warning, sizeof(warning), + "%s.%s size changed, may indicate binary incompatibility", + module_name, class_name); + if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad; + } + else if ((size_t)basicsize != size) { + PyErr_Format(PyExc_ValueError, + "%.200s.%.200s has the wrong size, try recompiling", + module_name, class_name); + goto bad; + } + return (PyTypeObject *)result; +bad: + Py_XDECREF(py_module); + Py_XDECREF(result); + return NULL; +} +#endif + +#ifndef __PYX_HAVE_RT_ImportFunction +#define __PYX_HAVE_RT_ImportFunction +static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig) { + PyObject *d = 0; + PyObject *cobj = 0; + union { + void (*fp)(void); + void *p; + } tmp; + d = PyObject_GetAttrString(module, (char *)"__pyx_capi__"); + if (!d) + goto bad; + cobj = PyDict_GetItemString(d, funcname); + if (!cobj) { + PyErr_Format(PyExc_ImportError, + "%.200s does not export expected C function %.200s", + PyModule_GetName(module), funcname); + goto bad; + } +#if PY_VERSION_HEX >= 0x02070000 + if (!PyCapsule_IsValid(cobj, sig)) { + PyErr_Format(PyExc_TypeError, + "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", + PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj)); + goto bad; + } + tmp.p = PyCapsule_GetPointer(cobj, sig); +#else + {const char *desc, *s1, *s2; + desc = (const char *)PyCObject_GetDesc(cobj); + if (!desc) + goto bad; + s1 = desc; s2 = sig; + while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; } + if (*s1 != *s2) { + PyErr_Format(PyExc_TypeError, + "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", + PyModule_GetName(module), funcname, sig, desc); + goto bad; + } + tmp.p = PyCObject_AsVoidPtr(cobj);} +#endif + *f = tmp.fp; + if (!(*f)) + goto bad; + Py_DECREF(d); + return 0; +bad: + Py_XDECREF(d); + return -1; +} +#endif + +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { + while (t->p) { + #if PY_MAJOR_VERSION < 3 + if (t->is_unicode) { + *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); + } else if (t->intern) { + *t->p = PyString_InternFromString(t->s); + } else { + *t->p = PyString_FromStringAndSize(t->s, t->n - 1); + } + #else + if (t->is_unicode | t->is_str) { + if (t->intern) { + *t->p = PyUnicode_InternFromString(t->s); + } else if (t->encoding) { + *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL); + } else { + *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); + } + } else { + *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1); + } + #endif + if (!*t->p) + return -1; + ++t; + } + return 0; +} + +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { + return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str)); +} +static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) { + Py_ssize_t ignore; + return __Pyx_PyObject_AsStringAndSize(o, &ignore); +} +static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { +#if CYTHON_COMPILING_IN_CPYTHON && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) + if ( +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + __Pyx_sys_getdefaultencoding_not_ascii && +#endif + PyUnicode_Check(o)) { +#if PY_VERSION_HEX < 0x03030000 + char* defenc_c; + PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); + if (!defenc) return NULL; + defenc_c = PyBytes_AS_STRING(defenc); +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + { + char* end = defenc_c + PyBytes_GET_SIZE(defenc); + char* c; + for (c = defenc_c; c < end; c++) { + if ((unsigned char) (*c) >= 128) { + PyUnicode_AsASCIIString(o); + return NULL; + } + } + } +#endif + *length = PyBytes_GET_SIZE(defenc); + return defenc_c; +#else + if (__Pyx_PyUnicode_READY(o) == -1) return NULL; +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + if (PyUnicode_IS_ASCII(o)) { + *length = PyUnicode_GET_LENGTH(o); + return PyUnicode_AsUTF8(o); + } else { + PyUnicode_AsASCIIString(o); + return NULL; + } +#else + return PyUnicode_AsUTF8AndSize(o, length); +#endif +#endif + } else +#endif +#if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) + if (PyByteArray_Check(o)) { + *length = PyByteArray_GET_SIZE(o); + return PyByteArray_AS_STRING(o); + } else +#endif + { + char* result; + int r = PyBytes_AsStringAndSize(o, &result, length); + if (unlikely(r < 0)) { + return NULL; + } else { + return result; + } + } +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { + int is_true = x == Py_True; + if (is_true | (x == Py_False) | (x == Py_None)) return is_true; + else return PyObject_IsTrue(x); +} +static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) { + PyNumberMethods *m; + const char *name = NULL; + PyObject *res = NULL; +#if PY_MAJOR_VERSION < 3 + if (PyInt_Check(x) || PyLong_Check(x)) +#else + if (PyLong_Check(x)) +#endif + return __Pyx_NewRef(x); + m = Py_TYPE(x)->tp_as_number; +#if PY_MAJOR_VERSION < 3 + if (m && m->nb_int) { + name = "int"; + res = PyNumber_Int(x); + } + else if (m && m->nb_long) { + name = "long"; + res = PyNumber_Long(x); + } +#else + if (m && m->nb_int) { + name = "int"; + res = PyNumber_Long(x); + } +#endif + if (res) { +#if PY_MAJOR_VERSION < 3 + if (!PyInt_Check(res) && !PyLong_Check(res)) { +#else + if (!PyLong_Check(res)) { +#endif + PyErr_Format(PyExc_TypeError, + "__%.4s__ returned non-%.4s (type %.200s)", + name, name, Py_TYPE(res)->tp_name); + Py_DECREF(res); + return NULL; + } + } + else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + } + return res; +} +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { + Py_ssize_t ival; + PyObject *x; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(b))) { + if (sizeof(Py_ssize_t) >= sizeof(long)) + return PyInt_AS_LONG(b); + else + return PyInt_AsSsize_t(x); + } +#endif + if (likely(PyLong_CheckExact(b))) { + #if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)b)->ob_digit; + const Py_ssize_t size = Py_SIZE(b); + if (likely(__Pyx_sst_abs(size) <= 1)) { + ival = likely(size) ? digits[0] : 0; + if (size == -1) ival = -ival; + return ival; + } else { + switch (size) { + case 2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + } + } + #endif + return PyLong_AsSsize_t(b); + } + x = PyNumber_Index(b); + if (!x) return -1; + ival = PyInt_AsSsize_t(x); + Py_DECREF(x); + return ival; +} +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { + return PyInt_FromSize_t(ival); +} + + +#endif /* Py_PYTHON_H */ diff --git a/gensim/models/count_words_inner.pyx b/gensim/models/count_words_inner.pyx new file mode 100644 index 0000000000..4ab97387e5 --- /dev/null +++ b/gensim/models/count_words_inner.pyx @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Contributed by Matthew Honnibal +# Copyright (C) 2015 ceded to Radim Rehurek +# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html + + +from cpython cimport PyUnicode_AS_DATA +from cpython cimport PyUnicode_GET_DATA_SIZE +from libc.stdint cimport uint64_t + +from murmurhash.mrmr cimport hash64 +from preshed.counter cimport PreshCounter, count_t + +from collections import defaultdict +from six import iteritems + + +cpdef uint64_t _hash_string(unicode string) except 0: + # This code is copied from spacy.strings. The implementation took some thought, + # and consultation with Stefan Behnel. Do not change blindly. Interaction + # with Python 2/3 is subtle. + chars = PyUnicode_AS_DATA(string) + size = PyUnicode_GET_DATA_SIZE(string) + return hash64(chars, size, 1) + + +cpdef uint64_t _hash_bytes(bytes string) except 0: + chars = string + return hash64(chars, len(string), 1) + + +def count_words_fast(sentences, count_t min_freq, int progress_per, log_progress): + cdef PreshCounter counts = PreshCounter() + strings = {} + sentence_no = -1 + total_words = 0 + cdef uint64_t key + cdef count_t count + for sentence_no, sentence in enumerate(sentences): + if sentence_no % progress_per == 0: + log_progress(sentence_no, total_words, len(strings)) + + for word in sentence: + # There's a likely bug here: we're going to be maintaining separate + # counts for unicode and byte strings, where defaultdict presumably + # hashes these the same, right? + # + # We could convert to one or the other by default, but the performance + # implications are pretty bad. It might be best to merge the counts + # when we form up the final vocab. + if isinstance(word, unicode): + key = _hash_string(word) + elif isinstance(word, bytes): + key = _hash_bytes(word) + else: + raise TypeError(type(word)) + counts.inc(key, 1) + # TODO: Why doesn't .inc return this? =/ + count = counts[key] + # Remember the string when we exceed min count + if count == min_freq: + strings[key] = word + total_words += len(sentence) + + # Use defaultdict to match the pure Python version of the function + vocab = defaultdict(int) + for key, word in iteritems(strings): + vocab[word] = counts[key] + return vocab, sentence_no + diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 4aa2d87742..8737667b94 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -96,6 +96,11 @@ logger = logging.getLogger(__name__) +try: + from gensim.models.count_words_inner import count_words_fast +except ImportError: + count_words_fast = None + try: from gensim.models.word2vec_inner import train_batch_sg, train_batch_cbow from gensim.models.word2vec_inner import score_sentence_sg, score_sentence_cbow @@ -504,21 +509,30 @@ def build_vocab(self, sentences, keep_raw_vocab=False, trim_rule=None): def scan_vocab(self, sentences, progress_per=10000, trim_rule=None): """Do an initial scan of all words appearing in sentences.""" + def log_progress(sentence_no, total_words, vocab_size): + logger.info("PROGRESS: at sentence #%i, processed %i words, keeping %i word types", + sentence_no, total_words, vocab_size) + logger.info("collecting all words and their counts") - sentence_no = -1 + total_words = 0 - min_reduce = 1 - vocab = defaultdict(int) - for sentence_no, sentence in enumerate(sentences): - if sentence_no % progress_per == 0: - logger.info("PROGRESS: at sentence #%i, processed %i words, keeping %i word types", - sentence_no, sum(itervalues(vocab)) + total_words, len(vocab)) - for word in sentence: - vocab[word] += 1 - - if self.max_vocab_size and len(vocab) > self.max_vocab_size: - total_words += utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule) - min_reduce += 1 + # Use the fast version if it exists, and if no trim rule is specified + if count_words_fast is not None and trim_rule is None: + vocab, sentence_no = count_words_fast(sentences, self.min_count, + progress_per, log_progress) + else: + sentence_no = -1 + min_reduce = 1 + vocab = defaultdict(int) + for sentence_no, sentence in enumerate(sentences): + if sentence_no % progress_per == 0: + log_progress(sentence_no, sum(itervalues(vocab)) + total_words, len(vocab)) + + for word in sentence: + vocab[word] += 1 + if self.max_vocab_size and len(vocab) > self.max_vocab_size: + total_words += utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule) + min_reduce += 1 total_words += sum(itervalues(vocab)) logger.info("collected %i word types from a corpus of %i raw words and %i sentences", diff --git a/setup.py b/setup.py index 3d46b6fe18..4509d131fd 100644 --- a/setup.py +++ b/setup.py @@ -127,6 +127,9 @@ def readfile(fname): Extension('gensim.models.doc2vec_inner', sources=['./gensim/models/doc2vec_inner.c'], include_dirs=[model_dir]), + Extension('gensim.models.count_words_inner', + sources=['./gensim/models/count_words_inner.c'], + include_dirs=[model_dir]), ], cmdclass=cmdclass, packages=find_packages(),