diff --git a/cysimdjson/cysimdjson.pyx b/cysimdjson/cysimdjson.pyx index 44e0129..66b9841 100644 --- a/cysimdjson/cysimdjson.pyx +++ b/cysimdjson/cysimdjson.pyx @@ -1,5 +1,6 @@ # cython: language_level=3 +from libc.stdint cimport uint32_t from libcpp cimport bool from libcpp.string cimport string from cpython.bytes cimport PyBytes_AsStringAndSize @@ -7,73 +8,116 @@ from cython.operator cimport preincrement from cython.operator cimport dereference from cpython.ref cimport PyObject + cdef extern from "string_view" namespace "std": cppclass string_view: pass + cdef extern from "pysimdjson/errors.h": cdef void simdjson_error_handler() -cdef extern from "jsoninter.h" namespace "simdjson_element_type": - cdef simdjson_element_type STRING - cdef simdjson_element_type INT64 - cdef simdjson_element_type DOUBLE - cdef simdjson_element_type UINT64 - cdef simdjson_element_type NULL_VALUE - cdef simdjson_element_type BOOL - cdef simdjson_element_type OBJECT - cdef simdjson_element_type ARRAY +cdef extern from "simdjson/simdjson.h" namespace "simdjson": + + cdef size_t SIMDJSON_MAXSIZE_BYTES + cdef size_t SIMDJSON_PADDING + + cdef enum: + SIMDJSON_VERSION_MAJOR + SIMDJSON_VERSION_MINOR + SIMDJSON_VERSION_REVISION -cdef extern from "jsoninter.h": - cppclass simdjson_object: +cdef extern from "simdjson/simdjson.h" namespace "simdjson::dom": + + cdef cppclass simdjson_object "simdjson::dom::object": + cppclass iterator: iterator() + simdjson_object operator*() iterator operator++() bint operator==(iterator) bint operator!=(iterator) string_view key() + uint32_t key_length() + const char *key_c_str() simdjson_element value() simdjson_object() + iterator begin() iterator end() size_t size() - cppclass simdjson_element_type: - pass + simdjson_element at_pointer(const char*) except +simdjson_error_handler + simdjson_element operator[](const char*) except +simdjson_error_handler - cppclass simdjson_element: - simdjson_element() - simdjson_element_type type() - cppclass simdjson_array: + cdef cppclass simdjson_array "simdjson::dom::array": + cppclass iterator: iterator() - simdjson_element operator*() + operator++() bint operator!=(iterator) + simdjson_element operator*() simdjson_array() + iterator begin() iterator end() size_t size() + size_t number_of_slots() + + # simd_element at(int) except +simdjson_error_handler + # simd_element at_pointer(const char*) except +simdjson_error_handler + + + cdef cppclass simdjson_element "simdjson::dom::element": + + simdjson_element() + + simdjson_element_type type() except +simdjson_error_handler + + const char *get_c_str() except +simdjson_error_handler + size_t get_string_length() except +simdjson_error_handler + + simdjson_array get_array() except +simdjson_error_handler + simdjson_element get_object() except +simdjson_error_handler + + + cdef cppclass simdjson_parser "simdjson::dom::parser": - cppclass simdjson_parser: simdjson_parser() simdjson_parser(size_t max_capacity) + simdjson_element load(string) except + simdjson_error_handler simdjson_element parse(const char * buf, size_t len, bool realloc_if_needed) except + simdjson_error_handler - cdef int getitem_from_element(simdjson_element & element, string & key, simdjson_element & value) except + simdjson_error_handler + +cdef extern from "simdjson/simdjson.h" namespace "simdjson::dom::element_type": + cdef enum simdjson_element_type "simdjson::dom::element_type": + OBJECT, + ARRAY, + STRING, + INT64, + UINT64, + DOUBLE, + BOOL, + NULL_VALUE + + +cdef extern from "jsoninter.h": + + cdef int getitem_from_object(simdjson_object & object, string & key, simdjson_element & value) except + simdjson_error_handler cdef int getitem_from_array(simdjson_array & array, int key, simdjson_element & value) except + simdjson_error_handler - cdef int at_pointer_element(simdjson_element & element, string & key, simdjson_element & value) except + simdjson_error_handler + cdef int at_pointer_object(simdjson_object & element, string & key, simdjson_element & value) except + simdjson_error_handler cdef int at_pointer_array(simdjson_array & array, string & key, simdjson_element & value) except + simdjson_error_handler cdef bool compare_type(simdjson_element_type a, simdjson_element_type b) except + simdjson_error_handler @@ -109,7 +153,7 @@ cdef class JSONArray: return self - def __contains__(JSONElement self, item): + def __contains__(JSONArray self, item): # This is a full scan for i in range(len(self)): if self[i] == item: @@ -142,7 +186,7 @@ cdef class JSONArray: preincrement(it) - def at_pointer(JSONElement self, key): + def at_pointer(JSONArray self, key): cdef simdjson_element v cdef int ok @@ -154,48 +198,45 @@ cdef class JSONArray: return _wrap_element(v) -cdef class JSONElement: +cdef class JSONObject: - cdef simdjson_element Document + cdef simdjson_object Object - def __cinit__(JSONElement self): - self.Document = simdjson_element() + def __cinit__(JSONObject self): + self.Object = simdjson_object() @staticmethod - cdef inline JSONElement build_JSONElement(simdjson_element document): - cdef JSONElement self = JSONElement.__new__(JSONElement) - self.Document = document + cdef inline JSONObject build_JSONObject(simdjson_element value): + cdef JSONObject self = JSONObject.__new__(JSONObject) + cdef int ok + self.Object = to_object(value, &ok) + if ok != 0: + raise ValueError() return self - def __contains__(JSONElement self, key): + def __contains__(JSONObject self, key): cdef simdjson_element v cdef int ok key_raw = key.encode('utf-8') - ok = getitem_from_element(self.Document, key_raw, v) + ok = getitem_from_object(self.Object, key_raw, v) return ok == 0 def __iter__(self): - for _key in self.keys(): yield _key def items(self): - cdef int ok cdef string_view sv cdef simdjson_element v - cdef simdjson_object obj = to_object(self.Document, &ok) - if ok != 0: - raise ValueError() - - cdef simdjson_object.iterator it = obj.begin() - while it != obj.end(): + cdef simdjson_object.iterator it = self.Object.begin() + while it != self.Object.end(): sv = it.key() v = it.value() @@ -203,71 +244,100 @@ cdef class JSONElement: preincrement(it) - def __getitem__(JSONElement self, key): + def __getitem__(JSONObject self, key): cdef simdjson_element v cdef int ok key_raw = key.encode('utf-8') - ok = getitem_from_element(self.Document, key_raw, v) + ok = getitem_from_object(self.Object, key_raw, v) if ok != 0: raise KeyError("Not found '{}'".format(key)) return _wrap_element(v) - def __len__(JSONElement self): + def __len__(JSONObject self): cdef int ok cdef string_view sv - cdef simdjson_object obj = to_object(self.Document, &ok) - if ok != 0: - raise ValueError() - - return obj.size() + return self.Object.size() - def keys(JSONElement self): + def keys(JSONObject self): cdef int ok cdef string_view sv - cdef simdjson_object obj = to_object(self.Document, &ok) - if ok != 0: - raise ValueError() - - cdef simdjson_object.iterator it = obj.begin() - while it != obj.end(): + cdef simdjson_object.iterator it = self.Object.begin() + while it != self.Object.end(): sv = it.key() yield string_view_to_python_string(sv) preincrement(it) - def at_pointer(JSONElement self, key): + def at_pointer(JSONObject self, key): cdef simdjson_element v cdef int ok key_raw = key.encode('utf-8') - ok = at_pointer_element(self.Document, key_raw, v) + ok = at_pointer_object(self.Object, key_raw, v) if ok != 0: raise KeyError("Not found '{}'".format(key)) return _wrap_element(v) -cdef class JSONDocument(JSONElement): +cdef class JSONObjectDocument(JSONObject): + ''' + Represents a top-level JSON object (dictionary). + ''' cdef object Data + cdef simdjson_element Element - def __cinit__(JSONDocument self): + def __cinit__(JSONObjectDocument self): self.Data = None - @staticmethod - cdef inline JSONDocument build_JSONDocument(simdjson_element document, object data): - cdef JSONDocument self = JSONDocument.__new__(JSONDocument) - self.Document = document - self.Data = data - return self +cdef inline JSONObjectDocument _build_JSONObjectDocument(simdjson_element element, object data): + cdef JSONObjectDocument self = JSONObjectDocument.__new__(JSONObjectDocument) + + cdef int ok + self.Object = to_object(element, &ok) + if ok != 0: + raise ValueError("Not an JSON object.") + + self.Element = element + self.Data = data + + return self + + +cdef class JSONArrayDocument(JSONArray): + ''' + Represents a top-level JSON array. + ''' + + cdef object Data + cdef simdjson_element Element + + + def __cinit__(JSONArrayDocument self): + self.Data = None + + +cdef inline JSONArrayDocument _build_JSONArrayDocument(simdjson_element element, object data): + cdef JSONArrayDocument self = JSONArrayDocument.__new__(JSONArrayDocument) + + cdef int ok + self.Array = to_array(element, &ok) + if ok != 0: + raise ValueError("Not an JSON array.") + + self.Element = element + self.Data = data + + return self cdef class JSONParser: @@ -276,7 +346,7 @@ cdef class JSONParser: simdjson_parser Parser - def __cinit__(self, max_capacity = None): + def __cinit__(self, max_capacity=None): if max_capacity is not None: self.Parser = simdjson_parser.simdjson_parser(int(max_capacity)) else: @@ -290,9 +360,8 @@ cdef class JSONParser: if rc == -1: raise RuntimeError("Failed to get raw data") - cdef simdjson_element doc = self.Parser.parse(data_ptr, pysize, 1) - - return JSONDocument.build_JSONDocument(doc, event) + cdef simdjson_element element = self.Parser.parse(data_ptr, pysize, 1) + return self._build(element, event) def parse_in_place(self, event): @@ -306,20 +375,32 @@ cdef class JSONParser: if rc == -1: raise RuntimeError("Failed to get raw data") - cdef simdjson_element doc = self.Parser.parse(data_ptr, pysize, 0) + cdef simdjson_element element = self.Parser.parse(data_ptr, pysize, 0) + return self._build(element, event) - return JSONDocument.build_JSONDocument(doc, event) def load(self, path): - cdef simdjson_element doc = self.Parser.load(path) - return JSONDocument.build_JSONDocument(doc, None) + cdef simdjson_element element = self.Parser.load(path) + return self._build(element, None) + + + cdef _build(self, simdjson_element element, event): + cdef simdjson_element_type et = element.type() + + if compare_type(et, OBJECT): + return _build_JSONObjectDocument(element, event) + + elif compare_type(et, ARRAY): + return _build_JSONArrayDocument(element, event) + + else: + return _wrap_element(element) def active_implementation(self): return get_active_implementation() - cdef inline object _wrap_element(simdjson_element v): cdef int ok cdef simdjson_element_type et = v.type() @@ -364,9 +445,19 @@ cdef inline object _wrap_element(simdjson_element v): return o if compare_type(et, OBJECT): - return JSONElement.build_JSONElement(v) + return JSONObject.build_JSONObject(v) if compare_type(et, ARRAY): return JSONArray.build_JSONArray(v) raise ValueError() + + +MAXSIZE_BYTES = SIMDJSON_MAXSIZE_BYTES +PADDING = SIMDJSON_PADDING + +SIMDJSON_VERSION = "{}.{}.{}".format( + SIMDJSON_VERSION_MAJOR, + SIMDJSON_VERSION_MINOR, + SIMDJSON_VERSION_REVISION +) diff --git a/cysimdjson/jsoninter.h b/cysimdjson/jsoninter.h index 091a740..5372b3c 100644 --- a/cysimdjson/jsoninter.h +++ b/cysimdjson/jsoninter.h @@ -3,16 +3,9 @@ using namespace simdjson; -// To avoid naming collisions between dom::object and python namespace -using simdjson_object = dom::object; -using simdjson_element_type = dom::element_type; -using simdjson_element = dom::element; -using simdjson_array = dom::array; -using simdjson_parser = dom::parser; - -inline int getitem_from_element(dom::element & element, const std::string & key, dom::element & value) { - auto error = element[key].get(value); +inline int getitem_from_object(dom::object & obj, const std::string & key, dom::element & value) { + auto error = obj[key].get(value); if (error) { return -1; } @@ -30,8 +23,8 @@ inline int getitem_from_array(dom::array & array, int key, dom::element & value) } -inline int at_pointer_element(dom::element & element, std::string & key, dom::element & value) { - auto error = element.at_pointer(key).get(value); +inline int at_pointer_object(dom::object & obj, std::string & key, dom::element & value) { + auto error = obj.at_pointer(key).get(value); if (error) { return -1; } diff --git a/cysimdjson/ondemand/README.md b/cysimdjson/ondemand/README.md deleted file mode 100644 index f432ce7..0000000 --- a/cysimdjson/ondemand/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# PoC for new on-demand implementation in SIMDJSON. - -**Currently not used.** diff --git a/cysimdjson/ondemand/jsoninter.h b/cysimdjson/ondemand/jsoninter.h deleted file mode 100644 index 5312b5c..0000000 --- a/cysimdjson/ondemand/jsoninter.h +++ /dev/null @@ -1,26 +0,0 @@ -#include "simdjson.h" -using namespace simdjson; - -inline int get(ondemand::document & document, const std::string & key, ondemand::value & value) { - auto error = document[key].get(value); - if (error) { - return -1; - } - return 0; -} - - -inline int get_string_view(ondemand::value & value, std::string_view & dst) { - auto error = value.get_string().get(dst); - if (error) { - std::cerr << error << std::endl; - return -1; - } - return 0; -} - - -inline std::string to_string(std::string_view sv) { - //TODO: This creates a string copy - return std::string(sv); -} diff --git a/cysimdjson/ondemand/simdjsonpy.pyx b/cysimdjson/ondemand/simdjsonpy.pyx deleted file mode 100644 index 6ff4ab7..0000000 --- a/cysimdjson/ondemand/simdjsonpy.pyx +++ /dev/null @@ -1,85 +0,0 @@ -# cython: language_level=3 - -from libcpp.string cimport string -from libc.stdint cimport int64_t - - -cdef extern from "simdjson.h" namespace "simdjson": - - cppclass padded_string: - padded_string() - padded_string(string) - - - -cdef extern from "simdjson.h" namespace "simdjson::ondemand": - - cppclass simdjson_result: - simdjson_result get(string_view) - - cppclass value: - simdjson_result get_string() - - cppclass document: - pass - - cppclass parser: - parser() - document iterate(padded_string) - - -cdef extern from "jsoninter.h": - cdef int get(document & document, string & key, value & value) - cdef int get_string_view(value & value, string_view & dst) - cdef string to_string(string_view sv) - -cdef extern from "string_view" namespace "std": - cppclass string_view: - pass - - -cdef class JSONDocument: - - cdef document Document - cdef padded_string JSONps - - def __cinit__(JSONDocument self, JSONParser parser, event): - self.JSONps = padded_string(event) - self.Document = parser.Parser.iterate(self.JSONps) - - - def __contains__(JSONDocument self, key): - cdef value v - cdef int ok - key_raw = key.encode('utf-8') - ok = get(self.Document, key_raw, v) - return ok == 0 - - - def __getitem__(JSONDocument self, key): - cdef value v - cdef int ok - - key_raw = key.encode('utf-8') - ok = get(self.Document, key_raw, v) - if ok != 0: - raise KeyError("Not found '{}'".format(key)) - - # TODO: Once value has type() method, check the type and decide what to return - # https://github.com/simdjson/simdjson/pull/1472 - - cdef string_view v_str - ok = get_string_view(v, v_str) - if ok != 0: - raise ValueError() - - return to_string(v_str).decode('utf-8') - - -cdef class JSONParser: - - cdef: - parser Parser - - def process(self, event): - return JSONDocument(self, event) diff --git a/cysimdjson/pysimdjson/errors.cpp b/cysimdjson/pysimdjson/errors.cpp index 11bbf1a..7d3c8e4 100644 --- a/cysimdjson/pysimdjson/errors.cpp +++ b/cysimdjson/pysimdjson/errors.cpp @@ -11,8 +11,7 @@ * * simd_element at(int) except +simdjson_error_handler */ -void -simdjson_error_handler() { +void simdjson_error_handler() { using namespace simdjson; try { diff --git a/test/test_array/__init__.py b/test/test_array/__init__.py index 31fdefc..f856d12 100644 --- a/test/test_array/__init__.py +++ b/test/test_array/__init__.py @@ -8,6 +8,7 @@ class JSONArrayTestCases(unittest.TestCase): + def test_iter_01(self): parser = cysimdjson.JSONParser() @@ -19,3 +20,14 @@ def test_iter_01(self): self.assertEqual(len(ar), 10) for i, n in enumerate(ar, 1): self.assertEqual(i, n) + + + def test_iter_02(self): + + parser = cysimdjson.JSONParser() + + with open(os.path.join(THIS_DIR, 'top_array.json'), 'rb') as fo: + json_parsed = parser.parse(fo.read()) + + for i, n in enumerate(json_parsed, 1): + self.assertEqual(i, n) diff --git a/test/test_array/top_array.json b/test/test_array/top_array.json new file mode 100644 index 0000000..9063324 --- /dev/null +++ b/test/test_array/top_array.json @@ -0,0 +1 @@ +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]