From b04f9c9467e69e3d5887b17bee50bf480c7e9b00 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 18 Nov 2024 12:54:02 -0800 Subject: [PATCH 01/11] orjson optional wip --- python/langsmith/_internal/_operations.py | 9 +-- python/langsmith/_internal/_orjson.py | 66 ++++++++++++++++++++++ python/langsmith/_internal/_serde.py | 18 +++--- python/langsmith/_testing.py | 4 +- python/langsmith/client.py | 25 ++++---- python/poetry.lock | 4 +- python/pyproject.toml | 2 +- python/tests/unit_tests/test_client.py | 9 +-- python/tests/unit_tests/test_operations.py | 8 +-- 9 files changed, 107 insertions(+), 38 deletions(-) create mode 100644 python/langsmith/_internal/_orjson.py diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py index e1e99d6e2..4d7fd16ec 100644 --- a/python/langsmith/_internal/_operations.py +++ b/python/langsmith/_internal/_operations.py @@ -5,7 +5,8 @@ import uuid from typing import Literal, Optional, Union, cast -import orjson + +from langsmith._internal import _orjson from langsmith import schemas as ls_schemas from langsmith._internal._multipart import MultipartPart, MultipartPartsAndContext @@ -169,12 +170,12 @@ def combine_serialized_queue_operations( if op._none is not None and op._none != create_op._none: # TODO optimize this more - this would currently be slowest # for large payloads - create_op_dict = orjson.loads(create_op._none) + create_op_dict = _orjson.loads(create_op._none) op_dict = { - k: v for k, v in orjson.loads(op._none).items() if v is not None + k: v for k, v in _orjson.loads(op._none).items() if v is not None } create_op_dict.update(op_dict) - create_op._none = orjson.dumps(create_op_dict) + create_op._none = _orjson.dumps(create_op_dict) if op.inputs is not None: create_op.inputs = op.inputs diff --git a/python/langsmith/_internal/_orjson.py b/python/langsmith/_internal/_orjson.py new file mode 100644 index 000000000..c0b10796f --- /dev/null +++ b/python/langsmith/_internal/_orjson.py @@ -0,0 +1,66 @@ +""" +Stubs for orjson operations, compatible with PyPy via a json fallback +""" + +try: + from orjson import ( + OPT_NON_STR_KEYS, + OPT_SERIALIZE_DATACLASS, + OPT_SERIALIZE_NUMPY, + OPT_SERIALIZE_UUID, + Fragment, + JSONDecodeError, + dumps, + loads, + ) + +except ImportError: + import json + from typing import Any, Optional, Callable + + OPT_NON_STR_KEYS = 1 + OPT_SERIALIZE_DATACLASS = 2 + OPT_SERIALIZE_NUMPY = 4 + OPT_SERIALIZE_UUID = 8 + + class Fragment: + def __init__(self, payloadb: bytes): + self.payloadb = payloadb + + from json import JSONDecodeError + + def dumps( + obj: Any, + *, + default: Optional[Callable[[Any], Any]] = None, + option: int = 0, + ) -> bytes: + + class CustomEncoder(json.JSONEncoder): + def encode(o: Any) -> str: + if isinstance(o, Fragment): + return o.payloadb.decode("utf-8") + return super().encode(o) + + def default(o: Any) -> Any: + if default is not None: + return default(o) + # TODO: handle OPT_ keys + return super().default(o) + + return json.dumps(obj, cls=CustomEncoder).encode("utf-8") + + def loads(payload: bytes) -> Any: + return json.loads(payload) + + +__all__ = [ + "loads", + "dumps", + "Fragment", + "JSONDecodeError", + "OPT_SERIALIZE_NUMPY", + "OPT_SERIALIZE_DATACLASS", + "OPT_SERIALIZE_UUID", + "OPT_NON_STR_KEYS", +] diff --git a/python/langsmith/_internal/_serde.py b/python/langsmith/_internal/_serde.py index e77f7319d..1bf8865c1 100644 --- a/python/langsmith/_internal/_serde.py +++ b/python/langsmith/_internal/_serde.py @@ -12,7 +12,7 @@ import uuid from typing import Any -import orjson +from langsmith._internal import _orjson try: from zoneinfo import ZoneInfo # type: ignore[import-not-found] @@ -133,13 +133,13 @@ def dumps_json(obj: Any) -> bytes: The JSON formatted string. """ try: - return orjson.dumps( + return _orjson.dumps( obj, default=_serialize_json, - option=orjson.OPT_SERIALIZE_NUMPY - | orjson.OPT_SERIALIZE_DATACLASS - | orjson.OPT_SERIALIZE_UUID - | orjson.OPT_NON_STR_KEYS, + option=_orjson.OPT_SERIALIZE_NUMPY + | _orjson.OPT_SERIALIZE_DATACLASS + | _orjson.OPT_SERIALIZE_UUID + | _orjson.OPT_NON_STR_KEYS, ) except TypeError as e: # Usually caused by UTF surrogate characters @@ -150,9 +150,9 @@ def dumps_json(obj: Any) -> bytes: ensure_ascii=True, ).encode("utf-8") try: - result = orjson.dumps( - orjson.loads(result.decode("utf-8", errors="surrogateescape")) + result = _orjson.dumps( + _orjson.loads(result.decode("utf-8", errors="surrogateescape")) ) - except orjson.JSONDecodeError: + except _orjson.JSONDecodeError: result = _elide_surrogates(result) return result diff --git a/python/langsmith/_testing.py b/python/langsmith/_testing.py index 8dd72fbcb..9eaa0877f 100644 --- a/python/langsmith/_testing.py +++ b/python/langsmith/_testing.py @@ -12,7 +12,6 @@ from pathlib import Path from typing import Any, Callable, Optional, Sequence, Tuple, TypeVar, overload -import orjson from typing_extensions import TypedDict from langsmith import client as ls_client @@ -21,6 +20,7 @@ from langsmith import run_trees as rt from langsmith import schemas as ls_schemas from langsmith import utils as ls_utils +from langsmith._internal import _orjson try: import pytest # type: ignore @@ -374,7 +374,7 @@ def _serde_example_values(values: VT) -> VT: if values is None: return values bts = ls_client._dumps_json(values) - return orjson.loads(bts) + return _orjson.loads(bts) class _LangSmithTestSuite: diff --git a/python/langsmith/client.py b/python/langsmith/client.py index eb397b4c4..297410605 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -55,7 +55,8 @@ ) from urllib import parse as urllib_parse -import orjson + +from langsmith._internal import _orjson import requests from requests import adapters as requests_adapters from requests_toolbelt import ( # type: ignore[import-untyped] @@ -1252,7 +1253,7 @@ def _hide_run_inputs(self, inputs: dict): if self._hide_inputs is True: return {} if self._anonymizer: - json_inputs = orjson.loads(_dumps_json(inputs)) + json_inputs = _orjson.loads(_dumps_json(inputs)) return self._anonymizer(json_inputs) if self._hide_inputs is False: return inputs @@ -1262,7 +1263,7 @@ def _hide_run_outputs(self, outputs: dict): if self._hide_outputs is True: return {} if self._anonymizer: - json_outputs = orjson.loads(_dumps_json(outputs)) + json_outputs = _orjson.loads(_dumps_json(outputs)) return self._anonymizer(json_outputs) if self._hide_outputs is False: return outputs @@ -1282,20 +1283,20 @@ def _batch_ingest_run_ops( # form the partial body and ids for op in ops: if isinstance(op, SerializedRunOperation): - curr_dict = orjson.loads(op._none) + curr_dict = _orjson.loads(op._none) if op.inputs: - curr_dict["inputs"] = orjson.Fragment(op.inputs) + curr_dict["inputs"] = _orjson.Fragment(op.inputs) if op.outputs: - curr_dict["outputs"] = orjson.Fragment(op.outputs) + curr_dict["outputs"] = _orjson.Fragment(op.outputs) if op.events: - curr_dict["events"] = orjson.Fragment(op.events) + curr_dict["events"] = _orjson.Fragment(op.events) if op.attachments: logger.warning( "Attachments are not supported when use_multipart_endpoint " "is False" ) ids_and_partial_body[op.operation].append( - (f"trace={op.trace_id},id={op.id}", orjson.dumps(curr_dict)) + (f"trace={op.trace_id},id={op.id}", _orjson.dumps(curr_dict)) ) elif isinstance(op, SerializedFeedbackOperation): logger.warning( @@ -1321,7 +1322,7 @@ def _batch_ingest_run_ops( and body_size + len(body_deque[0][1]) > size_limit_bytes ): self._post_batch_ingest_runs( - orjson.dumps(body_chunks), + _orjson.dumps(body_chunks), _context=f"\n{key}: {'; '.join(context_ids[key])}", ) body_size = 0 @@ -1329,12 +1330,12 @@ def _batch_ingest_run_ops( context_ids.clear() curr_id, curr_body = body_deque.popleft() body_size += len(curr_body) - body_chunks[key].append(orjson.Fragment(curr_body)) + body_chunks[key].append(_orjson.Fragment(curr_body)) context_ids[key].append(curr_id) if body_size: context = "; ".join(f"{k}: {'; '.join(v)}" for k, v in context_ids.items()) self._post_batch_ingest_runs( - orjson.dumps(body_chunks), _context="\n" + context + _orjson.dumps(body_chunks), _context="\n" + context ) def batch_ingest_runs( @@ -2759,7 +2760,7 @@ def create_dataset( "POST", "/datasets", headers={**self._headers, "Content-Type": "application/json"}, - data=orjson.dumps(dataset), + data=_orjson.dumps(dataset), ) ls_utils.raise_for_status_with_text(response) diff --git a/python/poetry.lock b/python/poetry.lock index a2e1c3667..2b362f986 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "annotated-types" @@ -2070,4 +2070,4 @@ vcr = [] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "ca8fa5c9a82d58bea646d5e7e1089175111ddec2c24cd0b19920d1afd4dd93da" +content-hash = "a5a6c61cba1b5ce9cf739700a780c2df63ff7aaa482c29de9910418263318586" diff --git a/python/pyproject.toml b/python/pyproject.toml index 81645c912..0278d6ddc 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -31,7 +31,7 @@ pydantic = [ { version = "^2.7.4", python = ">=3.12.4" }, ] requests = "^2" -orjson = "^3.9.14" +orjson = { version = "^3.9.14", markers = "platform_python_implementation != 'PyPy'" } httpx = ">=0.23.0,<1" requests-toolbelt = "^1.0.0" diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 5dc1bbe1e..381d7d748 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -22,7 +22,8 @@ from unittest.mock import MagicMock, patch import dataclasses_json -import orjson + +from langsmith._internal import _orjson import pytest import requests from multipart import MultipartParser, MultipartPart, parse_options_header @@ -848,7 +849,7 @@ class MyNamedTuple(NamedTuple): "set_with_class": set([MyClass(1)]), "my_mock": MagicMock(text="Hello, world"), } - res = orjson.loads(_dumps_json(to_serialize)) + res = _orjson.loads(_dumps_json(to_serialize)) assert ( "model_dump" not in caplog.text ), f"Unexpected error logs were emitted: {caplog.text}" @@ -898,7 +899,7 @@ def __repr__(self) -> str: my_cyclic = CyclicClass(other=CyclicClass(other=None)) my_cyclic.other.other = my_cyclic # type: ignore - res = orjson.loads(_dumps_json({"cyclic": my_cyclic})) + res = _orjson.loads(_dumps_json({"cyclic": my_cyclic})) assert res == {"cyclic": "my_cycles..."} expected = {"foo": "foo", "bar": 1} @@ -1142,7 +1143,7 @@ def test_batch_ingest_run_splits_large_batches( op for call in mock_session.request.call_args_list for reqs in ( - orjson.loads(call[1]["data"]).values() if call[0][0] == "POST" else [] + _orjson.loads(call[1]["data"]).values() if call[0][0] == "POST" else [] ) for op in reqs ] diff --git a/python/tests/unit_tests/test_operations.py b/python/tests/unit_tests/test_operations.py index a6b5cdeb3..5ac839861 100644 --- a/python/tests/unit_tests/test_operations.py +++ b/python/tests/unit_tests/test_operations.py @@ -1,4 +1,4 @@ -import orjson +from langsmith._internal import _orjson from langsmith._internal._operations import ( SerializedFeedbackOperation, @@ -14,7 +14,7 @@ def test_combine_serialized_queue_operations(): operation="post", id="id1", trace_id="trace_id1", - _none=orjson.dumps({"a": 1}), + _none=_orjson.dumps({"a": 1}), inputs="inputs1", outputs="outputs1", events="events1", @@ -24,7 +24,7 @@ def test_combine_serialized_queue_operations(): operation="patch", id="id1", trace_id="trace_id1", - _none=orjson.dumps({"b": "2"}), + _none=_orjson.dumps({"b": "2"}), inputs="inputs1-patched", outputs="outputs1-patched", events="events1", @@ -87,7 +87,7 @@ def test_combine_serialized_queue_operations(): operation="post", id="id1", trace_id="trace_id1", - _none=orjson.dumps({"a": 1, "b": "2"}), + _none=_orjson.dumps({"a": 1, "b": "2"}), inputs="inputs1-patched", outputs="outputs1-patched", events="events1", From bc92a2ddb6ef75e125eaf6eb673e6efa773b067b Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 18 Nov 2024 12:55:24 -0800 Subject: [PATCH 02/11] x --- python/langsmith/_internal/_operations.py | 4 +--- python/langsmith/_internal/_orjson.py | 7 ++----- python/langsmith/client.py | 3 +-- python/tests/unit_tests/test_client.py | 3 +-- python/tests/unit_tests/test_operations.py | 1 - 5 files changed, 5 insertions(+), 13 deletions(-) diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py index 4d7fd16ec..66decff0f 100644 --- a/python/langsmith/_internal/_operations.py +++ b/python/langsmith/_internal/_operations.py @@ -5,10 +5,8 @@ import uuid from typing import Literal, Optional, Union, cast - -from langsmith._internal import _orjson - from langsmith import schemas as ls_schemas +from langsmith._internal import _orjson from langsmith._internal._multipart import MultipartPart, MultipartPartsAndContext from langsmith._internal._serde import dumps_json as _dumps_json diff --git a/python/langsmith/_internal/_orjson.py b/python/langsmith/_internal/_orjson.py index c0b10796f..20974a0d8 100644 --- a/python/langsmith/_internal/_orjson.py +++ b/python/langsmith/_internal/_orjson.py @@ -1,6 +1,4 @@ -""" -Stubs for orjson operations, compatible with PyPy via a json fallback -""" +"""Stubs for orjson operations, compatible with PyPy via a json fallback.""" try: from orjson import ( @@ -16,7 +14,7 @@ except ImportError: import json - from typing import Any, Optional, Callable + from typing import Any, Callable, Optional OPT_NON_STR_KEYS = 1 OPT_SERIALIZE_DATACLASS = 2 @@ -35,7 +33,6 @@ def dumps( default: Optional[Callable[[Any], Any]] = None, option: int = 0, ) -> bytes: - class CustomEncoder(json.JSONEncoder): def encode(o: Any) -> str: if isinstance(o, Fragment): diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 297410605..65ad8a159 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -55,8 +55,6 @@ ) from urllib import parse as urllib_parse - -from langsmith._internal import _orjson import requests from requests import adapters as requests_adapters from requests_toolbelt import ( # type: ignore[import-untyped] @@ -70,6 +68,7 @@ from langsmith import env as ls_env from langsmith import schemas as ls_schemas from langsmith import utils as ls_utils +from langsmith._internal import _orjson from langsmith._internal._background_thread import ( TracingQueueItem, ) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 381d7d748..feec2c2f6 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -22,8 +22,6 @@ from unittest.mock import MagicMock, patch import dataclasses_json - -from langsmith._internal import _orjson import pytest import requests from multipart import MultipartParser, MultipartPart, parse_options_header @@ -34,6 +32,7 @@ import langsmith.utils as ls_utils from langsmith import AsyncClient, EvaluationResult, run_trees from langsmith import schemas as ls_schemas +from langsmith._internal import _orjson from langsmith._internal._serde import _serialize_json from langsmith.client import ( Client, diff --git a/python/tests/unit_tests/test_operations.py b/python/tests/unit_tests/test_operations.py index 5ac839861..43d06ebc5 100644 --- a/python/tests/unit_tests/test_operations.py +++ b/python/tests/unit_tests/test_operations.py @@ -1,5 +1,4 @@ from langsmith._internal import _orjson - from langsmith._internal._operations import ( SerializedFeedbackOperation, SerializedRunOperation, From f6197b2461f198cfea47db87fbf6511ed240a252 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 18 Nov 2024 13:01:09 -0800 Subject: [PATCH 03/11] x --- python/langsmith/_internal/_orjson.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/langsmith/_internal/_orjson.py b/python/langsmith/_internal/_orjson.py index 20974a0d8..6e75407fb 100644 --- a/python/langsmith/_internal/_orjson.py +++ b/python/langsmith/_internal/_orjson.py @@ -29,17 +29,17 @@ def __init__(self, payloadb: bytes): def dumps( obj: Any, - *, + /, default: Optional[Callable[[Any], Any]] = None, option: int = 0, ) -> bytes: class CustomEncoder(json.JSONEncoder): - def encode(o: Any) -> str: + def encode(self, o: Any) -> str: if isinstance(o, Fragment): return o.payloadb.decode("utf-8") return super().encode(o) - def default(o: Any) -> Any: + def default(self, o: Any) -> Any: if default is not None: return default(o) # TODO: handle OPT_ keys @@ -47,7 +47,7 @@ def default(o: Any) -> Any: return json.dumps(obj, cls=CustomEncoder).encode("utf-8") - def loads(payload: bytes) -> Any: + def loads(payload: bytes, /) -> Any: return json.loads(payload) From fba962048612eebc2f759aeb915d34e6f1be1dfc Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 18 Nov 2024 13:09:27 -0800 Subject: [PATCH 04/11] x --- python/poetry.lock | 4 ++-- python/pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/poetry.lock b/python/poetry.lock index 2b362f986..915693219 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -873,7 +873,7 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] name = "orjson" version = "3.10.11" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "orjson-3.10.11-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:6dade64687f2bd7c090281652fe18f1151292d567a9302b34c2dbb92a3872f1f"}, @@ -2070,4 +2070,4 @@ vcr = [] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "a5a6c61cba1b5ce9cf739700a780c2df63ff7aaa482c29de9910418263318586" +content-hash = "af3b8bc4b04b9e64b98adba7ac3528e34e40502ad721298e4ecc559edc89d252" diff --git a/python/pyproject.toml b/python/pyproject.toml index 0278d6ddc..f401a8789 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -31,7 +31,7 @@ pydantic = [ { version = "^2.7.4", python = ">=3.12.4" }, ] requests = "^2" -orjson = { version = "^3.9.14", markers = "platform_python_implementation != 'PyPy'" } +orjson = { version = "^3.9.14", markers = "platform_python_implementation != 'PyPy' and extra != 'no-orjson'" } httpx = ">=0.23.0,<1" requests-toolbelt = "^1.0.0" From 6a706f2cb059223b5b1a477d605785bb1c1603a2 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 18 Nov 2024 13:13:18 -0800 Subject: [PATCH 05/11] x --- python/poetry.lock | 3 ++- python/pyproject.toml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/python/poetry.lock b/python/poetry.lock index 915693219..493df48e4 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -2065,9 +2065,10 @@ multidict = ">=4.0" propcache = ">=0.2.0" [extras] +defaultjson = [] vcr = [] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "af3b8bc4b04b9e64b98adba7ac3528e34e40502ad721298e4ecc559edc89d252" +content-hash = "2af050c1fb5b7ba121c731253c044d85a276e8f77e76b30de1cc531eda60b53d" diff --git a/python/pyproject.toml b/python/pyproject.toml index f401a8789..8a6236e6b 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -31,7 +31,7 @@ pydantic = [ { version = "^2.7.4", python = ">=3.12.4" }, ] requests = "^2" -orjson = { version = "^3.9.14", markers = "platform_python_implementation != 'PyPy' and extra != 'no-orjson'" } +orjson = { version = "^3.9.14", markers = "platform_python_implementation != 'PyPy' and extra != 'defaultjson'" } httpx = ">=0.23.0,<1" requests-toolbelt = "^1.0.0" @@ -71,6 +71,7 @@ pytest-socket = "^0.7.0" [tool.poetry.extras] vcr = ["vcrpy"] +defaultjson = [] [build-system] requires = ["poetry-core"] From bdd9f0a4ed5e92f9fd2f0c4497b3f09500b2f7f9 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 18 Nov 2024 13:19:01 -0800 Subject: [PATCH 06/11] x --- python/tests/unit_tests/test_operations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tests/unit_tests/test_operations.py b/python/tests/unit_tests/test_operations.py index 43d06ebc5..304dd2977 100644 --- a/python/tests/unit_tests/test_operations.py +++ b/python/tests/unit_tests/test_operations.py @@ -20,7 +20,6 @@ def test_combine_serialized_queue_operations(): attachments=None, ), SerializedRunOperation( - operation="patch", id="id1", trace_id="trace_id1", _none=_orjson.dumps({"b": "2"}), From b68a14e6b1e6ab72eb8316b19b4f7053ee934578 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 18 Nov 2024 14:03:41 -0800 Subject: [PATCH 07/11] x --- python/langsmith/_internal/_orjson.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/langsmith/_internal/_orjson.py b/python/langsmith/_internal/_orjson.py index 6e75407fb..4b62a687d 100644 --- a/python/langsmith/_internal/_orjson.py +++ b/python/langsmith/_internal/_orjson.py @@ -21,22 +21,22 @@ OPT_SERIALIZE_NUMPY = 4 OPT_SERIALIZE_UUID = 8 - class Fragment: + class Fragment: # type: ignore def __init__(self, payloadb: bytes): self.payloadb = payloadb - from json import JSONDecodeError + from json import JSONDecodeError # type: ignore - def dumps( + def dumps( # type: ignore obj: Any, /, default: Optional[Callable[[Any], Any]] = None, option: int = 0, - ) -> bytes: - class CustomEncoder(json.JSONEncoder): + ) -> bytes: # type: ignore + class CustomEncoder(json.JSONEncoder): # type: ignore def encode(self, o: Any) -> str: if isinstance(o, Fragment): - return o.payloadb.decode("utf-8") + return o.payloadb.decode("utf-8") # type: ignore return super().encode(o) def default(self, o: Any) -> Any: @@ -47,7 +47,7 @@ def default(self, o: Any) -> Any: return json.dumps(obj, cls=CustomEncoder).encode("utf-8") - def loads(payload: bytes, /) -> Any: + def loads(payload: bytes, /) -> Any: # type: ignore return json.loads(payload) From eca8e9621a9c59ff8f3aca9d7bb6000d651603c0 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 18 Nov 2024 14:22:55 -0800 Subject: [PATCH 08/11] x --- python/langsmith/_internal/_orjson.py | 23 +++++++++++++++++++++- python/tests/unit_tests/test_operations.py | 1 + 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/python/langsmith/_internal/_orjson.py b/python/langsmith/_internal/_orjson.py index 4b62a687d..ecd9e20bc 100644 --- a/python/langsmith/_internal/_orjson.py +++ b/python/langsmith/_internal/_orjson.py @@ -13,7 +13,9 @@ ) except ImportError: + import dataclasses import json + import uuid from typing import Any, Callable, Optional OPT_NON_STR_KEYS = 1 @@ -33,6 +35,14 @@ def dumps( # type: ignore default: Optional[Callable[[Any], Any]] = None, option: int = 0, ) -> bytes: # type: ignore + # for now, don't do anything for this case because `json.dumps` + # automatically encodes non-str keys as str by default, unlike orjson + # enable_non_str_keys = bool(option & OPT_NON_STR_KEYS) + + enable_serialize_numpy = bool(option & OPT_SERIALIZE_NUMPY) + enable_serialize_dataclass = bool(option & OPT_SERIALIZE_DATACLASS) + enable_serialize_uuid = bool(option & OPT_SERIALIZE_UUID) + class CustomEncoder(json.JSONEncoder): # type: ignore def encode(self, o: Any) -> str: if isinstance(o, Fragment): @@ -40,9 +50,20 @@ def encode(self, o: Any) -> str: return super().encode(o) def default(self, o: Any) -> Any: + if enable_serialize_uuid and isinstance(o, uuid.UUID): + return str(o) + if enable_serialize_numpy and hasattr(o, "tolist"): + # even objects like np.uint16(15) have a .tolist() function + return o.tolist() + if ( + enable_serialize_dataclass + and dataclasses.is_dataclass(o) + and not isinstance(o, type) + ): + return dataclasses.asdict(o) if default is not None: return default(o) - # TODO: handle OPT_ keys + return super().default(o) return json.dumps(obj, cls=CustomEncoder).encode("utf-8") diff --git a/python/tests/unit_tests/test_operations.py b/python/tests/unit_tests/test_operations.py index 304dd2977..43d06ebc5 100644 --- a/python/tests/unit_tests/test_operations.py +++ b/python/tests/unit_tests/test_operations.py @@ -20,6 +20,7 @@ def test_combine_serialized_queue_operations(): attachments=None, ), SerializedRunOperation( + operation="patch", id="id1", trace_id="trace_id1", _none=_orjson.dumps({"b": "2"}), From cb2998cb07c493b00aa9e142a3b4150fb90171f0 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 18 Nov 2024 15:34:57 -0800 Subject: [PATCH 09/11] x --- .../langsmith/_internal/_background_thread.py | 23 ++++++++++++++----- python/langsmith/client.py | 3 +++ python/poetry.lock | 5 ++-- python/pyproject.toml | 3 +-- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/python/langsmith/_internal/_background_thread.py b/python/langsmith/_internal/_background_thread.py index b6aee1f4e..9a3888f2b 100644 --- a/python/langsmith/_internal/_background_thread.py +++ b/python/langsmith/_internal/_background_thread.py @@ -155,13 +155,24 @@ def tracing_control_thread_func(client_ref: weakref.ref[Client]) -> None: # 1 for this func, 1 for getrefcount, 1 for _get_data_type_cached num_known_refs = 3 + def keep_thread_active() -> bool: + # if `client.cleanup()` was called, stop thread + if client and client._manual_cleanup: + return False + if not threading.main_thread().is_alive(): + # main thread is dead. should not be active + return False + try: + # check if client refs count indicates we're the only remaining + # reference to the client + return sys.getrefcount(client) > num_known_refs + len(sub_threads) + except AttributeError: + # in PyPy, there is no sys.getrefcount attribute + # for now, keep thread alive + return True + # loop until - while ( - # the main thread dies - threading.main_thread().is_alive() - # or we're the only remaining reference to the client - and sys.getrefcount(client) > num_known_refs + len(sub_threads) - ): + while keep_thread_active(): for thread in sub_threads: if not thread.is_alive(): sub_threads.remove(thread) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 65ad8a159..29601881a 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -368,6 +368,7 @@ class Client: "_info", "_write_api_urls", "_settings", + "_manual_cleanup", ] def __init__( @@ -516,6 +517,8 @@ def __init__( self._settings: Union[ls_schemas.LangSmithSettings, None] = None + self._manual_cleanup = False + def _repr_html_(self) -> str: """Return an HTML representation of the instance with a link to the URL. diff --git a/python/poetry.lock b/python/poetry.lock index 493df48e4..2b362f986 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -873,7 +873,7 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] name = "orjson" version = "3.10.11" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "orjson-3.10.11-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:6dade64687f2bd7c090281652fe18f1151292d567a9302b34c2dbb92a3872f1f"}, @@ -2065,10 +2065,9 @@ multidict = ">=4.0" propcache = ">=0.2.0" [extras] -defaultjson = [] vcr = [] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "2af050c1fb5b7ba121c731253c044d85a276e8f77e76b30de1cc531eda60b53d" +content-hash = "a5a6c61cba1b5ce9cf739700a780c2df63ff7aaa482c29de9910418263318586" diff --git a/python/pyproject.toml b/python/pyproject.toml index 8a6236e6b..0278d6ddc 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -31,7 +31,7 @@ pydantic = [ { version = "^2.7.4", python = ">=3.12.4" }, ] requests = "^2" -orjson = { version = "^3.9.14", markers = "platform_python_implementation != 'PyPy' and extra != 'defaultjson'" } +orjson = { version = "^3.9.14", markers = "platform_python_implementation != 'PyPy'" } httpx = ">=0.23.0,<1" requests-toolbelt = "^1.0.0" @@ -71,7 +71,6 @@ pytest-socket = "^0.7.0" [tool.poetry.extras] vcr = ["vcrpy"] -defaultjson = [] [build-system] requires = ["poetry-core"] From 15de54988ee911bb99a5225d1e150b6e05b6e915 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 18 Nov 2024 15:36:03 -0800 Subject: [PATCH 10/11] x --- python/langsmith/client.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 29601881a..8348b57d1 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -5678,6 +5678,10 @@ def push_prompt( ) return url + def cleanup(self) -> None: + """Manually trigger cleanup of the background thread.""" + self._manual_cleanup = True + def convert_prompt_to_openai_format( messages: Any, From c9f76c76085fc95522c1c2b3c84a07a81900b14f Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Mon, 18 Nov 2024 16:14:05 -0800 Subject: [PATCH 11/11] x --- python/langsmith/_internal/_background_thread.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/langsmith/_internal/_background_thread.py b/python/langsmith/_internal/_background_thread.py index 9a3888f2b..844851996 100644 --- a/python/langsmith/_internal/_background_thread.py +++ b/python/langsmith/_internal/_background_thread.py @@ -162,11 +162,12 @@ def keep_thread_active() -> bool: if not threading.main_thread().is_alive(): # main thread is dead. should not be active return False - try: + + if hasattr(sys, "getrefcount"): # check if client refs count indicates we're the only remaining # reference to the client return sys.getrefcount(client) > num_known_refs + len(sub_threads) - except AttributeError: + else: # in PyPy, there is no sys.getrefcount attribute # for now, keep thread alive return True