Skip to content

Commit

Permalink
Fix serializing replaced dict keys
Browse files Browse the repository at this point in the history
  • Loading branch information
ijl committed Jun 28, 2022
1 parent fadf02a commit 0dbc2d1
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 138 deletions.
126 changes: 1 addition & 125 deletions src/ffi/dict.rs
Original file line number Diff line number Diff line change
@@ -1,133 +1,9 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

use pyo3_ffi::{PyObject, Py_hash_t, Py_ssize_t};
use std::os::raw::{c_char, c_void};
use pyo3_ffi::{PyDictObject, PyObject, Py_ssize_t};

#[allow(non_snake_case)]
#[inline(always)]
pub unsafe fn PyDict_GET_SIZE(op: *mut PyObject) -> Py_ssize_t {
(*op.cast::<PyDictObject>()).ma_used
}

// dictobject.h
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct PyDictObject {
pub ob_refcnt: pyo3_ffi::Py_ssize_t,
pub ob_type: *mut pyo3_ffi::PyTypeObject,
pub ma_used: pyo3_ffi::Py_ssize_t,
pub ma_version_tag: u64,
pub ma_keys: *mut PyDictKeysObject,
pub ma_values: *mut *mut PyObject,
}

// dict-common.h
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct PyDictKeyEntry {
pub me_hash: Py_hash_t,
pub me_key: *mut PyObject,
pub me_value: *mut PyObject,
}

// dict-common.h
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct PyDictKeysObject {
pub dk_refcnt: Py_ssize_t,
pub dk_size: Py_ssize_t,
pub dk_lookup: *mut c_void, // dict_lookup_func
pub dk_usable: Py_ssize_t,
pub dk_nentries: Py_ssize_t,
pub dk_indices: [c_char; 1],
}

// dictobject.c
#[allow(non_snake_case)]
#[cfg(target_pointer_width = "64")]
fn DK_IXSIZE(dk: *mut PyDictKeysObject) -> isize {
unsafe {
if (*dk).dk_size <= 0xff {
1
} else if (*dk).dk_size <= 0xffff {
2
} else if (*dk).dk_size <= 0xffffffff {
4
} else {
8
}
}
}

// dictobject.c
#[allow(non_snake_case)]
#[cfg(target_pointer_width = "32")]
fn DK_IXSIZE(dk: *mut PyDictKeysObject) -> isize {
unsafe {
if (*dk).dk_size <= 0xff {
1
} else if (*dk).dk_size <= 0xffff {
2
} else {
4
}
}
}

pub struct PyDictIter {
dict_ptr: *mut PyDictObject,
indices_ptr: *mut PyDictKeyEntry,
idx: usize,
len: usize,
}

impl PyDictIter {
pub fn new(dict_ptr: *mut PyDictObject) -> Self {
unsafe {
let dk = (*dict_ptr).ma_keys;
let offset = (*dk).dk_size * DK_IXSIZE(dk);
let indices_ptr = std::mem::transmute::<*mut [c_char; 1], *mut u8>(
std::ptr::addr_of_mut!((*dk).dk_indices),
)
.offset(offset) as *mut PyDictKeyEntry;
let len = PyDict_GET_SIZE(dict_ptr as *mut pyo3_ffi::PyObject) as usize;
PyDictIter {
dict_ptr: dict_ptr,
indices_ptr: indices_ptr,
idx: 0,
len: len,
}
}
}
}

impl Iterator for PyDictIter {
type Item = (*mut pyo3_ffi::PyObject, *mut pyo3_ffi::PyObject);

fn next(&mut self) -> Option<Self::Item> {
unsafe {
if unlikely!(self.idx == self.len) {
None
} else if !(*self.dict_ptr).ma_values.is_null() {
let entry_ptr: *mut PyDictKeyEntry = self.indices_ptr.add(self.idx);
let value = (*(*self.dict_ptr).ma_values).add(self.idx);
self.idx += 1;
Some(((*entry_ptr).me_key, value))
} else {
let mut entry_ptr: *mut PyDictKeyEntry = self.indices_ptr.add(self.idx);
while self.idx < self.len && (*entry_ptr).me_value.is_null() {
entry_ptr = entry_ptr.add(1);
}
let value = (*entry_ptr).me_value;
self.idx += 1;
Some(((*entry_ptr).me_key, value))
}
}
}
}

impl ExactSizeIterator for PyDictIter {
fn len(&self) -> usize {
self.idx - self.len
}
}
7 changes: 6 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,12 @@ pub unsafe extern "C" fn dumps(
}

if !kwds.is_null() {
for (arg, val) in crate::ffi::PyDictIter::new(kwds as *mut crate::ffi::PyDictObject) {
let len = unsafe { crate::ffi::PyDict_GET_SIZE(kwds) };
let mut pos = 0isize;
let mut arg: *mut PyObject = null_mut();
let mut val: *mut PyObject = null_mut();
for _ in 0..=len.saturating_sub(1) {
unsafe { _PyDict_Next(kwds, &mut pos, &mut arg, &mut val, null_mut()) };
if arg == typeref::DEFAULT {
if unlikely!(num_args & 2 == 2) {
return raise_dumps_exception(Cow::Borrowed(
Expand Down
62 changes: 50 additions & 12 deletions src/serialize/dict.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

use crate::ffi::{PyDictIter, PyDictObject, PyDict_GET_SIZE};
use crate::ffi::PyDict_GET_SIZE;
use crate::opt::*;
use crate::serialize::datetime::*;
use crate::serialize::datetimelike::*;
Expand All @@ -13,6 +13,7 @@ use crate::unicode::*;
use inlinable_string::InlinableString;
use serde::ser::{Serialize, SerializeMap, Serializer};
use smallvec::SmallVec;
use std::ptr::addr_of_mut;
use std::ptr::NonNull;

pub struct Dict {
Expand Down Expand Up @@ -48,8 +49,27 @@ impl Serialize for Dict {
S: Serializer,
{
let mut map = serializer.serialize_map(None).unwrap();
let mut pos = 0isize;
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
for (key, value) in PyDictIter::new(self.ptr as *mut PyDictObject) {
let mut key: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
let mut value: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
for _ in 0..=unsafe { PyDict_GET_SIZE(self.ptr) as usize } - 1 {
unsafe {
pyo3_ffi::_PyDict_Next(
self.ptr,
addr_of_mut!(pos),
addr_of_mut!(key),
addr_of_mut!(value),
std::ptr::null_mut(),
)
};
let value = PyObjectSerializer::new(
value,
self.opts,
self.default_calls,
self.recursion + 1,
self.default,
);
if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) {
err!(SerializeError::KeyMustBeStr)
}
Expand All @@ -60,14 +80,8 @@ impl Serialize for Dict {
}
map.serialize_key(str_from_slice!(data, str_size)).unwrap();
}
let pyvalue = PyObjectSerializer::new(
value,
self.opts,
self.default_calls,
self.recursion + 1,
self.default,
);
map.serialize_value(&pyvalue)?;

map.serialize_value(&value)?;
}
map.end()
}
Expand Down Expand Up @@ -108,8 +122,20 @@ impl Serialize for DictSortedKey {
let len = unsafe { PyDict_GET_SIZE(self.ptr) as usize };
let mut items: SmallVec<[(&str, *mut pyo3_ffi::PyObject); 8]> =
SmallVec::with_capacity(len);
let mut pos = 0isize;
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
for (key, value) in PyDictIter::new(self.ptr as *mut PyDictObject) {
let mut key: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
let mut value: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
for _ in 0..=len - 1 {
unsafe {
pyo3_ffi::_PyDict_Next(
self.ptr,
addr_of_mut!(pos),
addr_of_mut!(key),
addr_of_mut!(value),
std::ptr::null_mut(),
)
};
if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) {
err!(SerializeError::KeyMustBeStr)
}
Expand Down Expand Up @@ -274,9 +300,21 @@ impl Serialize for DictNonStrKey {
let len = unsafe { PyDict_GET_SIZE(self.ptr) as usize };
let mut items: SmallVec<[(InlinableString, *mut pyo3_ffi::PyObject); 8]> =
SmallVec::with_capacity(len);
let mut pos = 0isize;
let mut str_size: pyo3_ffi::Py_ssize_t = 0;
let mut key: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
let mut value: *mut pyo3_ffi::PyObject = std::ptr::null_mut();
let opts = self.opts & NOT_PASSTHROUGH;
for (key, value) in PyDictIter::new(self.ptr as *mut PyDictObject) {
for _ in 0..=len - 1 {
unsafe {
pyo3_ffi::_PyDict_Next(
self.ptr,
addr_of_mut!(pos),
addr_of_mut!(key),
addr_of_mut!(value),
std::ptr::null_mut(),
)
};
if is_type!(ob_type!(key), STR_TYPE) {
let data = read_utf8_from_str(key, &mut str_size);
if unlikely!(data.is_null()) {
Expand Down
2 changes: 2 additions & 0 deletions test/test_issue221.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

import pytest

import orjson
Expand Down
18 changes: 18 additions & 0 deletions test/test_issue274.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

import orjson


def test_pop():
data = {"id": "any", "static": "msg"}
data.pop("id")
data["id"] = "new"
# not b'{"static":"msg","static":"msg"}'
assert orjson.dumps(data) == b'{"static":"msg","id":"new"}'


def test_in_place():
# not an issue
data = {"id": "any", "static": "msg"}
data["id"] = "new"
assert orjson.dumps(data) == b'{"id":"new","static":"msg"}'

0 comments on commit 0dbc2d1

Please sign in to comment.