diff --git a/.stats.yml b/.stats.yml
index e550e0bd..19e9daeb 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
-configured_endpoints: 10
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic-73443ebfebee64b8ec0ebbacd2521d6b6aa900e9526ec97abdcbcff0c0955d9b.yml
+configured_endpoints: 19
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic-be055148d227480fcacc9086c37ac8009dcb487731069ada51af35044f65bee4.yml
diff --git a/api.md b/api.md
index 1ebd122d..7669870a 100644
--- a/api.md
+++ b/api.md
@@ -1,14 +1,35 @@
+# Shared Types
+
+```python
+from anthropic.types import (
+    APIErrorObject,
+    AuthenticationError,
+    BillingError,
+    ErrorObject,
+    ErrorResponse,
+    GatewayTimeoutError,
+    InvalidRequestError,
+    NotFoundError,
+    OverloadedError,
+    PermissionError,
+    RateLimitError,
+)
+```
+
 # Messages
 
 Types:
 
 ```python
 from anthropic.types import (
+    Base64PDFSource,
+    CacheControlEphemeral,
     ContentBlock,
     ContentBlockDeltaEvent,
     ContentBlockParam,
     ContentBlockStartEvent,
     ContentBlockStopEvent,
+    DocumentBlockParam,
     ImageBlockParam,
     InputJSONDelta,
     Message,
@@ -18,6 +39,7 @@ from anthropic.types import (
     MessageStartEvent,
     MessageStopEvent,
     MessageStreamEvent,
+    MessageTokensCount,
     Metadata,
     Model,
     RawContentBlockDeltaEvent,
@@ -44,8 +66,47 @@ from anthropic.types import (
 
 Methods:
 
-- <code title="post /v1/messages">client.messages.<a href="./src/anthropic/resources/messages.py">create</a>(\*\*<a href="src/anthropic/types/message_create_params.py">params</a>) -> <a href="./src/anthropic/types/message.py">Message</a></code>
+- <code title="post /v1/messages">client.messages.<a href="./src/anthropic/resources/messages/messages.py">create</a>(\*\*<a href="src/anthropic/types/message_create_params.py">params</a>) -> <a href="./src/anthropic/types/message.py">Message</a></code>
 - <code>client.messages.<a href="./src/anthropic/resources/messages.py">stream</a>(\*args) -> MessageStreamManager[MessageStream] | MessageStreamManager[MessageStreamT]</code>
+- <code title="post /v1/messages/count_tokens">client.messages.<a href="./src/anthropic/resources/messages/messages.py">count_tokens</a>(\*\*<a href="src/anthropic/types/message_count_tokens_params.py">params</a>) -> <a href="./src/anthropic/types/message_tokens_count.py">MessageTokensCount</a></code>
+
+## Batches
+
+Types:
+
+```python
+from anthropic.types.messages import (
+    MessageBatch,
+    MessageBatchCanceledResult,
+    MessageBatchErroredResult,
+    MessageBatchExpiredResult,
+    MessageBatchIndividualResponse,
+    MessageBatchRequestCounts,
+    MessageBatchResult,
+    MessageBatchSucceededResult,
+)
+```
+
+Methods:
+
+- <code title="post /v1/messages/batches">client.messages.batches.<a href="./src/anthropic/resources/messages/batches.py">create</a>(\*\*<a href="src/anthropic/types/messages/batch_create_params.py">params</a>) -> <a href="./src/anthropic/types/messages/message_batch.py">MessageBatch</a></code>
+- <code title="get /v1/messages/batches/{message_batch_id}">client.messages.batches.<a href="./src/anthropic/resources/messages/batches.py">retrieve</a>(message_batch_id) -> <a href="./src/anthropic/types/messages/message_batch.py">MessageBatch</a></code>
+- <code title="get /v1/messages/batches">client.messages.batches.<a href="./src/anthropic/resources/messages/batches.py">list</a>(\*\*<a href="src/anthropic/types/messages/batch_list_params.py">params</a>) -> <a href="./src/anthropic/types/messages/message_batch.py">SyncPage[MessageBatch]</a></code>
+- <code title="post /v1/messages/batches/{message_batch_id}/cancel">client.messages.batches.<a href="./src/anthropic/resources/messages/batches.py">cancel</a>(message_batch_id) -> <a href="./src/anthropic/types/messages/message_batch.py">MessageBatch</a></code>
+- <code title="get /v1/messages/batches/{message_batch_id}/results">client.messages.batches.<a href="./src/anthropic/resources/messages/batches.py">results</a>(message_batch_id) -> BinaryAPIResponse</code>
+
+# Models
+
+Types:
+
+```python
+from anthropic.types import ModelInfo
+```
+
+Methods:
+
+- <code title="get /v1/models/{model_id}">client.models.<a href="./src/anthropic/resources/models.py">retrieve</a>(model_id) -> <a href="./src/anthropic/types/model_info.py">ModelInfo</a></code>
+- <code title="get /v1/models">client.models.<a href="./src/anthropic/resources/models.py">list</a>(\*\*<a href="src/anthropic/types/model_list_params.py">params</a>) -> <a href="./src/anthropic/types/model_info.py">SyncPage[ModelInfo]</a></code>
 
 # Beta
 
@@ -56,8 +117,10 @@ from anthropic.types import (
     AnthropicBeta,
     BetaAPIError,
     BetaAuthenticationError,
+    BetaBillingError,
     BetaError,
     BetaErrorResponse,
+    BetaGatewayTimeoutError,
     BetaInvalidRequestError,
     BetaNotFoundError,
     BetaOverloadedError,
@@ -66,6 +129,19 @@ from anthropic.types import (
 )
 ```
 
+## Models
+
+Types:
+
+```python
+from anthropic.types.beta import BetaModelInfo
+```
+
+Methods:
+
+- <code title="get /v1/models/{model_id}?beta=true">client.beta.models.<a href="./src/anthropic/resources/beta/models.py">retrieve</a>(model_id) -> <a href="./src/anthropic/types/beta/beta_model_info.py">BetaModelInfo</a></code>
+- <code title="get /v1/models?beta=true">client.beta.models.<a href="./src/anthropic/resources/beta/models.py">list</a>(\*\*<a href="src/anthropic/types/beta/model_list_params.py">params</a>) -> <a href="./src/anthropic/types/beta/beta_model_info.py">SyncPage[BetaModelInfo]</a></code>
+
 ## Messages
 
 Types:
@@ -139,30 +215,3 @@ Methods:
 - <code title="get /v1/messages/batches?beta=true">client.beta.messages.batches.<a href="./src/anthropic/resources/beta/messages/batches.py">list</a>(\*\*<a href="src/anthropic/types/beta/messages/batch_list_params.py">params</a>) -> <a href="./src/anthropic/types/beta/messages/beta_message_batch.py">SyncPage[BetaMessageBatch]</a></code>
 - <code title="post /v1/messages/batches/{message_batch_id}/cancel?beta=true">client.beta.messages.batches.<a href="./src/anthropic/resources/beta/messages/batches.py">cancel</a>(message_batch_id) -> <a href="./src/anthropic/types/beta/messages/beta_message_batch.py">BetaMessageBatch</a></code>
 - <code title="get /v1/messages/batches/{message_batch_id}/results?beta=true">client.beta.messages.batches.<a href="./src/anthropic/resources/beta/messages/batches.py">results</a>(message_batch_id) -> BinaryAPIResponse</code>
-
-## PromptCaching
-
-### Messages
-
-Types:
-
-```python
-from anthropic.types.beta.prompt_caching import (
-    PromptCachingBetaCacheControlEphemeral,
-    PromptCachingBetaImageBlockParam,
-    PromptCachingBetaMessage,
-    PromptCachingBetaMessageParam,
-    PromptCachingBetaTextBlockParam,
-    PromptCachingBetaTool,
-    PromptCachingBetaToolResultBlockParam,
-    PromptCachingBetaToolUseBlockParam,
-    PromptCachingBetaUsage,
-    RawPromptCachingBetaMessageStartEvent,
-    RawPromptCachingBetaMessageStreamEvent,
-)
-```
-
-Methods:
-
-- <code title="post /v1/messages?beta=prompt_caching">client.beta.prompt_caching.messages.<a href="./src/anthropic/resources/beta/prompt_caching/messages.py">create</a>(\*\*<a href="src/anthropic/types/beta/prompt_caching/message_create_params.py">params</a>) -> <a href="./src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py">PromptCachingBetaMessage</a></code>
-- <code title="post /v1/messages?beta=prompt_caching">client.beta.prompt_caching.messages.<a href="./src/anthropic/resources/beta/prompt_caching/messages.py">stream</a>(\*\*<a href="src/anthropic/types/beta/prompt_caching/message_create_params.py">params</a>) -> <a href="./src/anthropic/lib/streaming/_prompt_caching_beta_messages.py">PromptCachingBetaMessageStreamManager</a></code>
diff --git a/src/anthropic/_client.py b/src/anthropic/_client.py
index e2eb27c4..8bf77861 100644
--- a/src/anthropic/_client.py
+++ b/src/anthropic/_client.py
@@ -25,7 +25,7 @@
     get_async_library,
 )
 from ._version import __version__
-from .resources import messages, completions
+from .resources import models, completions
 from ._streaming import Stream as Stream, AsyncStream as AsyncStream
 from ._exceptions import APIStatusError
 from ._base_client import (
@@ -34,6 +34,7 @@
     AsyncAPIClient,
 )
 from .resources.beta import beta
+from .resources.messages import messages
 
 __all__ = [
     "Timeout",
@@ -50,6 +51,7 @@
 class Anthropic(SyncAPIClient):
     completions: completions.Completions
     messages: messages.Messages
+    models: models.Models
     beta: beta.Beta
     with_raw_response: AnthropicWithRawResponse
     with_streaming_response: AnthropicWithStreamedResponse
@@ -120,6 +122,7 @@ def __init__(
 
         self.completions = completions.Completions(self)
         self.messages = messages.Messages(self)
+        self.models = models.Models(self)
         self.beta = beta.Beta(self)
         self.with_raw_response = AnthropicWithRawResponse(self)
         self.with_streaming_response = AnthropicWithStreamedResponse(self)
@@ -268,6 +271,7 @@ def _make_status_error(
 class AsyncAnthropic(AsyncAPIClient):
     completions: completions.AsyncCompletions
     messages: messages.AsyncMessages
+    models: models.AsyncModels
     beta: beta.AsyncBeta
     with_raw_response: AsyncAnthropicWithRawResponse
     with_streaming_response: AsyncAnthropicWithStreamedResponse
@@ -338,6 +342,7 @@ def __init__(
 
         self.completions = completions.AsyncCompletions(self)
         self.messages = messages.AsyncMessages(self)
+        self.models = models.AsyncModels(self)
         self.beta = beta.AsyncBeta(self)
         self.with_raw_response = AsyncAnthropicWithRawResponse(self)
         self.with_streaming_response = AsyncAnthropicWithStreamedResponse(self)
@@ -487,6 +492,7 @@ class AnthropicWithRawResponse:
     def __init__(self, client: Anthropic) -> None:
         self.completions = completions.CompletionsWithRawResponse(client.completions)
         self.messages = messages.MessagesWithRawResponse(client.messages)
+        self.models = models.ModelsWithRawResponse(client.models)
         self.beta = beta.BetaWithRawResponse(client.beta)
 
 
@@ -494,6 +500,7 @@ class AsyncAnthropicWithRawResponse:
     def __init__(self, client: AsyncAnthropic) -> None:
         self.completions = completions.AsyncCompletionsWithRawResponse(client.completions)
         self.messages = messages.AsyncMessagesWithRawResponse(client.messages)
+        self.models = models.AsyncModelsWithRawResponse(client.models)
         self.beta = beta.AsyncBetaWithRawResponse(client.beta)
 
 
@@ -501,6 +508,7 @@ class AnthropicWithStreamedResponse:
     def __init__(self, client: Anthropic) -> None:
         self.completions = completions.CompletionsWithStreamingResponse(client.completions)
         self.messages = messages.MessagesWithStreamingResponse(client.messages)
+        self.models = models.ModelsWithStreamingResponse(client.models)
         self.beta = beta.BetaWithStreamingResponse(client.beta)
 
 
@@ -508,6 +516,7 @@ class AsyncAnthropicWithStreamedResponse:
     def __init__(self, client: AsyncAnthropic) -> None:
         self.completions = completions.AsyncCompletionsWithStreamingResponse(client.completions)
         self.messages = messages.AsyncMessagesWithStreamingResponse(client.messages)
+        self.models = models.AsyncModelsWithStreamingResponse(client.models)
         self.beta = beta.AsyncBetaWithStreamingResponse(client.beta)
 
 
diff --git a/src/anthropic/lib/streaming/__init__.py b/src/anthropic/lib/streaming/__init__.py
index fbd25b02..0ab41209 100644
--- a/src/anthropic/lib/streaming/__init__.py
+++ b/src/anthropic/lib/streaming/__init__.py
@@ -11,9 +11,3 @@
     MessageStreamManager as MessageStreamManager,
     AsyncMessageStreamManager as AsyncMessageStreamManager,
 )
-from ._prompt_caching_beta_messages import (
-    PromptCachingBetaMessageStream as PromptCachingBetaMessageStream,
-    AsyncPromptCachingBetaMessageStream as AsyncPromptCachingBetaMessageStream,
-    PromptCachingBetaMessageStreamManager as PromptCachingBetaMessageStreamManager,
-    AsyncPromptCachingBetaMessageStreamManager as AsyncPromptCachingBetaMessageStreamManager,
-)
diff --git a/src/anthropic/lib/streaming/_prompt_caching_beta_messages.py b/src/anthropic/lib/streaming/_prompt_caching_beta_messages.py
deleted file mode 100644
index df727ea8..00000000
--- a/src/anthropic/lib/streaming/_prompt_caching_beta_messages.py
+++ /dev/null
@@ -1,423 +0,0 @@
-from __future__ import annotations
-
-from types import TracebackType
-from typing import TYPE_CHECKING, Any, Callable, cast
-from typing_extensions import Self, Iterator, Awaitable, AsyncIterator, assert_never
-
-import httpx
-
-from ...types import ContentBlock
-from ..._utils import consume_sync_iterator, consume_async_iterator
-from ..._models import build, construct_type
-from ..._streaming import Stream, AsyncStream
-from ._prompt_caching_beta_types import (
-    TextEvent,
-    InputJsonEvent,
-    MessageStopEvent,
-    ContentBlockStopEvent,
-    PromptCachingBetaMessageStreamEvent,
-)
-from ...types.beta.prompt_caching import PromptCachingBetaMessage, RawPromptCachingBetaMessageStreamEvent
-
-if TYPE_CHECKING:
-    from ..._client import Anthropic, AsyncAnthropic
-
-
-class PromptCachingBetaMessageStream:
-    text_stream: Iterator[str]
-    """Iterator over just the text deltas in the stream.
-
-    ```py
-    for text in stream.text_stream:
-        print(text, end="", flush=True)
-    print()
-    ```
-    """
-
-    response: httpx.Response
-
-    def __init__(
-        self,
-        *,
-        cast_to: type[RawPromptCachingBetaMessageStreamEvent],
-        response: httpx.Response,
-        client: Anthropic,
-    ) -> None:
-        self.response = response
-        self._cast_to = cast_to
-        self._client = client
-
-        self.text_stream = self.__stream_text__()
-        self.__final_message_snapshot: PromptCachingBetaMessage | None = None
-
-        self._iterator = self.__stream__()
-        self._raw_stream: Stream[RawPromptCachingBetaMessageStreamEvent] = Stream(
-            cast_to=cast_to, response=response, client=client
-        )
-
-    def __next__(self) -> PromptCachingBetaMessageStreamEvent:
-        return self._iterator.__next__()
-
-    def __iter__(self) -> Iterator[PromptCachingBetaMessageStreamEvent]:
-        for item in self._iterator:
-            yield item
-
-    def __enter__(self) -> Self:
-        return self
-
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        self.close()
-
-    def close(self) -> None:
-        """
-        Close the response and release the connection.
-
-        Automatically called if the response body is read to completion.
-        """
-        self.response.close()
-
-    def get_final_message(self) -> PromptCachingBetaMessage:
-        """Waits until the stream has been read to completion and returns
-        the accumulated `PromptCachingBetaMessage` object.
-        """
-        self.until_done()
-        assert self.__final_message_snapshot is not None
-        return self.__final_message_snapshot
-
-    def get_final_text(self) -> str:
-        """Returns all `text` content blocks concatenated together.
-
-        > [!NOTE]
-        > Currently the API will only respond with a single content block.
-
-        Will raise an error if no `text` content blocks were returned.
-        """
-        message = self.get_final_message()
-        text_blocks: list[str] = []
-        for block in message.content:
-            if block.type == "text":
-                text_blocks.append(block.text)
-
-        if not text_blocks:
-            raise RuntimeError("Expected to have received at least 1 text block")
-
-        return "".join(text_blocks)
-
-    def until_done(self) -> None:
-        """Blocks until the stream has been consumed"""
-        consume_sync_iterator(self)
-
-    # properties
-    @property
-    def current_message_snapshot(self) -> PromptCachingBetaMessage:
-        assert self.__final_message_snapshot is not None
-        return self.__final_message_snapshot
-
-    def __stream__(self) -> Iterator[PromptCachingBetaMessageStreamEvent]:
-        for sse_event in self._raw_stream:
-            self.__final_message_snapshot = accumulate_event(
-                event=sse_event,
-                current_snapshot=self.__final_message_snapshot,
-            )
-
-            events_to_fire = build_events(event=sse_event, message_snapshot=self.current_message_snapshot)
-            for event in events_to_fire:
-                yield event
-
-    def __stream_text__(self) -> Iterator[str]:
-        for chunk in self:
-            if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
-                yield chunk.delta.text
-
-
-class PromptCachingBetaMessageStreamManager:
-    """Wrapper over PromptCachingBetaMessageStream that is returned by `.stream()`.
-
-    ```py
-    with client.beta.prompt_caching.messages.stream(...) as stream:
-        for chunk in stream:
-            ...
-    ```
-    """
-
-    def __init__(
-        self,
-        api_request: Callable[[], Stream[RawPromptCachingBetaMessageStreamEvent]],
-    ) -> None:
-        self.__stream: PromptCachingBetaMessageStream | None = None
-        self.__api_request = api_request
-
-    def __enter__(self) -> PromptCachingBetaMessageStream:
-        raw_stream = self.__api_request()
-
-        self.__stream = PromptCachingBetaMessageStream(
-            cast_to=raw_stream._cast_to,
-            response=raw_stream.response,
-            client=raw_stream._client,
-        )
-
-        return self.__stream
-
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        if self.__stream is not None:
-            self.__stream.close()
-
-
-class AsyncPromptCachingBetaMessageStream:
-    text_stream: AsyncIterator[str]
-    """Async iterator over just the text deltas in the stream.
-
-    ```py
-    async for text in stream.text_stream:
-        print(text, end="", flush=True)
-    print()
-    ```
-    """
-
-    response: httpx.Response
-
-    def __init__(
-        self,
-        *,
-        cast_to: type[RawPromptCachingBetaMessageStreamEvent],
-        response: httpx.Response,
-        client: AsyncAnthropic,
-    ) -> None:
-        self.response = response
-        self._cast_to = cast_to
-        self._client = client
-
-        self.text_stream = self.__stream_text__()
-        self.__final_message_snapshot: PromptCachingBetaMessage | None = None
-
-        self._iterator = self.__stream__()
-        self._raw_stream: AsyncStream[RawPromptCachingBetaMessageStreamEvent] = AsyncStream(
-            cast_to=cast_to, response=response, client=client
-        )
-
-    async def __anext__(self) -> PromptCachingBetaMessageStreamEvent:
-        return await self._iterator.__anext__()
-
-    async def __aiter__(self) -> AsyncIterator[PromptCachingBetaMessageStreamEvent]:
-        async for item in self._iterator:
-            yield item
-
-    async def __aenter__(self) -> Self:
-        return self
-
-    async def __aexit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        await self.close()
-
-    async def close(self) -> None:
-        """
-        Close the response and release the connection.
-
-        Automatically called if the response body is read to completion.
-        """
-        await self.response.aclose()
-
-    async def get_final_message(self) -> PromptCachingBetaMessage:
-        """Waits until the stream has been read to completion and returns
-        the accumulated `PromptCachingBetaMessage` object.
-        """
-        await self.until_done()
-        assert self.__final_message_snapshot is not None
-        return self.__final_message_snapshot
-
-    async def get_final_text(self) -> str:
-        """Returns all `text` content blocks concatenated together.
-
-        > [!NOTE]
-        > Currently the API will only respond with a single content block.
-
-        Will raise an error if no `text` content blocks were returned.
-        """
-        message = await self.get_final_message()
-        text_blocks: list[str] = []
-        for block in message.content:
-            if block.type == "text":
-                text_blocks.append(block.text)
-
-        if not text_blocks:
-            raise RuntimeError("Expected to have received at least 1 text block")
-
-        return "".join(text_blocks)
-
-    async def until_done(self) -> None:
-        """Waits until the stream has been consumed"""
-        await consume_async_iterator(self)
-
-    # properties
-    @property
-    def current_message_snapshot(self) -> PromptCachingBetaMessage:
-        assert self.__final_message_snapshot is not None
-        return self.__final_message_snapshot
-
-    async def __stream__(self) -> AsyncIterator[PromptCachingBetaMessageStreamEvent]:
-        async for sse_event in self._raw_stream:
-            self.__final_message_snapshot = accumulate_event(
-                event=sse_event,
-                current_snapshot=self.__final_message_snapshot,
-            )
-
-            events_to_fire = build_events(event=sse_event, message_snapshot=self.current_message_snapshot)
-            for event in events_to_fire:
-                yield event
-
-    async def __stream_text__(self) -> AsyncIterator[str]:
-        async for chunk in self:
-            if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
-                yield chunk.delta.text
-
-
-class AsyncPromptCachingBetaMessageStreamManager:
-    """Wrapper over AsyncMessageStream that is returned by `.stream()`
-    so that an async context manager can be used without `await`ing the
-    original client call.
-
-    ```py
-    async with client.messages.stream(...) as stream:
-        async for chunk in stream:
-            ...
-    ```
-    """
-
-    def __init__(
-        self,
-        api_request: Awaitable[AsyncStream[RawPromptCachingBetaMessageStreamEvent]],
-    ) -> None:
-        self.__stream: AsyncPromptCachingBetaMessageStream | None = None
-        self.__api_request = api_request
-
-    async def __aenter__(self) -> AsyncPromptCachingBetaMessageStream:
-        raw_stream = await self.__api_request
-
-        self.__stream = AsyncPromptCachingBetaMessageStream(
-            cast_to=raw_stream._cast_to,
-            response=raw_stream.response,
-            client=raw_stream._client,
-        )
-
-        return self.__stream
-
-    async def __aexit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        if self.__stream is not None:
-            await self.__stream.close()
-
-
-def build_events(
-    *,
-    event: RawPromptCachingBetaMessageStreamEvent,
-    message_snapshot: PromptCachingBetaMessage,
-) -> list[PromptCachingBetaMessageStreamEvent]:
-    events_to_fire: list[PromptCachingBetaMessageStreamEvent] = []
-
-    if event.type == "message_start":
-        events_to_fire.append(event)
-    elif event.type == "message_delta":
-        events_to_fire.append(event)
-    elif event.type == "message_stop":
-        events_to_fire.append(build(MessageStopEvent, type="message_stop", message=message_snapshot))
-    elif event.type == "content_block_start":
-        events_to_fire.append(event)
-    elif event.type == "content_block_delta":
-        events_to_fire.append(event)
-
-        content_block = message_snapshot.content[event.index]
-        if event.delta.type == "text_delta" and content_block.type == "text":
-            events_to_fire.append(
-                build(
-                    TextEvent,
-                    type="text",
-                    text=event.delta.text,
-                    snapshot=content_block.text,
-                )
-            )
-        elif event.delta.type == "input_json_delta" and content_block.type == "tool_use":
-            events_to_fire.append(
-                build(
-                    InputJsonEvent,
-                    type="input_json",
-                    partial_json=event.delta.partial_json,
-                    snapshot=content_block.input,
-                )
-            )
-    elif event.type == "content_block_stop":
-        content_block = message_snapshot.content[event.index]
-
-        events_to_fire.append(
-            build(ContentBlockStopEvent, type="content_block_stop", index=event.index, content_block=content_block),
-        )
-    else:
-        # we only want exhaustive checking for linters, not at runtime
-        if TYPE_CHECKING:  # type: ignore[unreachable]
-            assert_never(event)
-
-    return events_to_fire
-
-
-JSON_BUF_PROPERTY = "__json_buf"
-
-
-def accumulate_event(
-    *,
-    event: RawPromptCachingBetaMessageStreamEvent,
-    current_snapshot: PromptCachingBetaMessage | None,
-) -> PromptCachingBetaMessage:
-    if current_snapshot is None:
-        if event.type == "message_start":
-            return PromptCachingBetaMessage.construct(**cast(Any, event.message.to_dict()))
-
-        raise RuntimeError(f'Unexpected event order, got {event.type} before "message_start"')
-
-    if event.type == "content_block_start":
-        # TODO: check index
-        current_snapshot.content.append(
-            cast(
-                ContentBlock,
-                construct_type(type_=ContentBlock, value=event.content_block.model_dump()),
-            ),
-        )
-    elif event.type == "content_block_delta":
-        content = current_snapshot.content[event.index]
-        if content.type == "text" and event.delta.type == "text_delta":
-            content.text += event.delta.text
-        elif content.type == "tool_use" and event.delta.type == "input_json_delta":
-            from jiter import from_json
-
-            # we need to keep track of the raw JSON string as well so that we can
-            # re-parse it for each delta, for now we just store it as an untyped
-            # property on the snapshot
-            json_buf = cast(bytes, getattr(content, JSON_BUF_PROPERTY, b""))
-            json_buf += bytes(event.delta.partial_json, "utf-8")
-
-            if json_buf:
-                content.input = from_json(json_buf, partial_mode=True)
-
-            setattr(content, JSON_BUF_PROPERTY, json_buf)
-    elif event.type == "message_delta":
-        current_snapshot.stop_reason = event.delta.stop_reason
-        current_snapshot.stop_sequence = event.delta.stop_sequence
-        current_snapshot.usage.output_tokens = event.usage.output_tokens
-
-    return current_snapshot
diff --git a/src/anthropic/lib/streaming/_prompt_caching_beta_types.py b/src/anthropic/lib/streaming/_prompt_caching_beta_types.py
deleted file mode 100644
index d8fdce52..00000000
--- a/src/anthropic/lib/streaming/_prompt_caching_beta_types.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from typing import Union
-from typing_extensions import Literal
-
-from ._types import (
-    TextEvent,
-    InputJsonEvent,
-    RawMessageDeltaEvent,
-    ContentBlockStopEvent,
-    RawContentBlockDeltaEvent,
-    RawContentBlockStartEvent,
-)
-from ...types import RawMessageStopEvent
-from ...types.beta.prompt_caching import PromptCachingBetaMessage, RawPromptCachingBetaMessageStartEvent
-
-
-class MessageStopEvent(RawMessageStopEvent):
-    type: Literal["message_stop"]
-
-    message: PromptCachingBetaMessage
-
-
-PromptCachingBetaMessageStreamEvent = Union[
-    RawPromptCachingBetaMessageStartEvent,
-    MessageStopEvent,
-    # same as non-beta
-    TextEvent,
-    InputJsonEvent,
-    RawMessageDeltaEvent,
-    RawContentBlockStartEvent,
-    RawContentBlockDeltaEvent,
-    ContentBlockStopEvent,
-]
diff --git a/src/anthropic/resources/__init__.py b/src/anthropic/resources/__init__.py
index 318d5cdd..ffff8855 100644
--- a/src/anthropic/resources/__init__.py
+++ b/src/anthropic/resources/__init__.py
@@ -8,6 +8,14 @@
     BetaWithStreamingResponse,
     AsyncBetaWithStreamingResponse,
 )
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
 from .messages import (
     Messages,
     AsyncMessages,
@@ -38,6 +46,12 @@
     "AsyncMessagesWithRawResponse",
     "MessagesWithStreamingResponse",
     "AsyncMessagesWithStreamingResponse",
+    "Models",
+    "AsyncModels",
+    "ModelsWithRawResponse",
+    "AsyncModelsWithRawResponse",
+    "ModelsWithStreamingResponse",
+    "AsyncModelsWithStreamingResponse",
     "Beta",
     "AsyncBeta",
     "BetaWithRawResponse",
diff --git a/src/anthropic/resources/beta/__init__.py b/src/anthropic/resources/beta/__init__.py
index d06a0802..82b343fa 100644
--- a/src/anthropic/resources/beta/__init__.py
+++ b/src/anthropic/resources/beta/__init__.py
@@ -8,6 +8,14 @@
     BetaWithStreamingResponse,
     AsyncBetaWithStreamingResponse,
 )
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
 from .messages import (
     Messages,
     AsyncMessages,
@@ -16,28 +24,20 @@
     MessagesWithStreamingResponse,
     AsyncMessagesWithStreamingResponse,
 )
-from .prompt_caching import (
-    PromptCaching,
-    AsyncPromptCaching,
-    PromptCachingWithRawResponse,
-    AsyncPromptCachingWithRawResponse,
-    PromptCachingWithStreamingResponse,
-    AsyncPromptCachingWithStreamingResponse,
-)
 
 __all__ = [
+    "Models",
+    "AsyncModels",
+    "ModelsWithRawResponse",
+    "AsyncModelsWithRawResponse",
+    "ModelsWithStreamingResponse",
+    "AsyncModelsWithStreamingResponse",
     "Messages",
     "AsyncMessages",
     "MessagesWithRawResponse",
     "AsyncMessagesWithRawResponse",
     "MessagesWithStreamingResponse",
     "AsyncMessagesWithStreamingResponse",
-    "PromptCaching",
-    "AsyncPromptCaching",
-    "PromptCachingWithRawResponse",
-    "AsyncPromptCachingWithRawResponse",
-    "PromptCachingWithStreamingResponse",
-    "AsyncPromptCachingWithStreamingResponse",
     "Beta",
     "AsyncBeta",
     "BetaWithRawResponse",
diff --git a/src/anthropic/resources/beta/beta.py b/src/anthropic/resources/beta/beta.py
index fbff30fa..8293782d 100644
--- a/src/anthropic/resources/beta/beta.py
+++ b/src/anthropic/resources/beta/beta.py
@@ -2,6 +2,14 @@
 
 from __future__ import annotations
 
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from .messages.messages import (
@@ -12,26 +20,18 @@
     MessagesWithStreamingResponse,
     AsyncMessagesWithStreamingResponse,
 )
-from .prompt_caching.prompt_caching import (
-    PromptCaching,
-    AsyncPromptCaching,
-    PromptCachingWithRawResponse,
-    AsyncPromptCachingWithRawResponse,
-    PromptCachingWithStreamingResponse,
-    AsyncPromptCachingWithStreamingResponse,
-)
 
 __all__ = ["Beta", "AsyncBeta"]
 
 
 class Beta(SyncAPIResource):
     @cached_property
-    def messages(self) -> Messages:
-        return Messages(self._client)
+    def models(self) -> Models:
+        return Models(self._client)
 
     @cached_property
-    def prompt_caching(self) -> PromptCaching:
-        return PromptCaching(self._client)
+    def messages(self) -> Messages:
+        return Messages(self._client)
 
     @cached_property
     def with_raw_response(self) -> BetaWithRawResponse:
@@ -55,12 +55,12 @@ def with_streaming_response(self) -> BetaWithStreamingResponse:
 
 class AsyncBeta(AsyncAPIResource):
     @cached_property
-    def messages(self) -> AsyncMessages:
-        return AsyncMessages(self._client)
+    def models(self) -> AsyncModels:
+        return AsyncModels(self._client)
 
     @cached_property
-    def prompt_caching(self) -> AsyncPromptCaching:
-        return AsyncPromptCaching(self._client)
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
 
     @cached_property
     def with_raw_response(self) -> AsyncBetaWithRawResponse:
@@ -87,12 +87,12 @@ def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
     @cached_property
-    def messages(self) -> MessagesWithRawResponse:
-        return MessagesWithRawResponse(self._beta.messages)
+    def models(self) -> ModelsWithRawResponse:
+        return ModelsWithRawResponse(self._beta.models)
 
     @cached_property
-    def prompt_caching(self) -> PromptCachingWithRawResponse:
-        return PromptCachingWithRawResponse(self._beta.prompt_caching)
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._beta.messages)
 
 
 class AsyncBetaWithRawResponse:
@@ -100,12 +100,12 @@ def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
     @cached_property
-    def messages(self) -> AsyncMessagesWithRawResponse:
-        return AsyncMessagesWithRawResponse(self._beta.messages)
+    def models(self) -> AsyncModelsWithRawResponse:
+        return AsyncModelsWithRawResponse(self._beta.models)
 
     @cached_property
-    def prompt_caching(self) -> AsyncPromptCachingWithRawResponse:
-        return AsyncPromptCachingWithRawResponse(self._beta.prompt_caching)
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._beta.messages)
 
 
 class BetaWithStreamingResponse:
@@ -113,12 +113,12 @@ def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
     @cached_property
-    def messages(self) -> MessagesWithStreamingResponse:
-        return MessagesWithStreamingResponse(self._beta.messages)
+    def models(self) -> ModelsWithStreamingResponse:
+        return ModelsWithStreamingResponse(self._beta.models)
 
     @cached_property
-    def prompt_caching(self) -> PromptCachingWithStreamingResponse:
-        return PromptCachingWithStreamingResponse(self._beta.prompt_caching)
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._beta.messages)
 
 
 class AsyncBetaWithStreamingResponse:
@@ -126,9 +126,9 @@ def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
     @cached_property
-    def messages(self) -> AsyncMessagesWithStreamingResponse:
-        return AsyncMessagesWithStreamingResponse(self._beta.messages)
+    def models(self) -> AsyncModelsWithStreamingResponse:
+        return AsyncModelsWithStreamingResponse(self._beta.models)
 
     @cached_property
-    def prompt_caching(self) -> AsyncPromptCachingWithStreamingResponse:
-        return AsyncPromptCachingWithStreamingResponse(self._beta.prompt_caching)
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._beta.messages)
diff --git a/src/anthropic/resources/beta/messages/batches.py b/src/anthropic/resources/beta/messages/batches.py
index 19a1f86f..4b070a04 100644
--- a/src/anthropic/resources/beta/messages/batches.py
+++ b/src/anthropic/resources/beta/messages/batches.py
@@ -183,7 +183,7 @@ def list(
 
           limit: Number of items to return per page.
 
-              Defaults to `20`. Ranges from `1` to `100`.
+              Defaults to `20`. Ranges from `1` to `1000`.
 
           betas: Optional header to specify the beta version(s) you want to use.
 
@@ -500,7 +500,7 @@ def list(
 
           limit: Number of items to return per page.
 
-              Defaults to `20`. Ranges from `1` to `100`.
+              Defaults to `20`. Ranges from `1` to `1000`.
 
           betas: Optional header to specify the beta version(s) you want to use.
 
diff --git a/src/anthropic/resources/beta/models.py b/src/anthropic/resources/beta/models.py
new file mode 100644
index 00000000..fdad3298
--- /dev/null
+++ b/src/anthropic/resources/beta/models.py
@@ -0,0 +1,300 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncPage, AsyncPage
+from ...types.beta import model_list_params
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.beta.beta_model_info import BetaModelInfo
+
+__all__ = ["Models", "AsyncModels"]
+
+
+class Models(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return ModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return ModelsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        model_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaModelInfo:
+        """
+        Get a specific model.
+
+        The Models API response can be used to determine information about a specific
+        model or resolve a model alias to a model ID.
+
+        Args:
+          model_id: Model identifier or alias.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model_id:
+            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
+        return self._get(
+            f"/v1/models/{model_id}?beta=true",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaModelInfo,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[BetaModelInfo]:
+        """
+        List available models.
+
+        The Models API response can be used to determine which models are available for
+        use in the API. More recently released models are listed first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/models?beta=true",
+            page=SyncPage[BetaModelInfo],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    model_list_params.ModelListParams,
+                ),
+            ),
+            model=BetaModelInfo,
+        )
+
+
+class AsyncModels(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncModelsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        model_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaModelInfo:
+        """
+        Get a specific model.
+
+        The Models API response can be used to determine information about a specific
+        model or resolve a model alias to a model ID.
+
+        Args:
+          model_id: Model identifier or alias.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model_id:
+            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
+        return await self._get(
+            f"/v1/models/{model_id}?beta=true",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaModelInfo,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[BetaModelInfo, AsyncPage[BetaModelInfo]]:
+        """
+        List available models.
+
+        The Models API response can be used to determine which models are available for
+        use in the API. More recently released models are listed first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/models?beta=true",
+            page=AsyncPage[BetaModelInfo],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    model_list_params.ModelListParams,
+                ),
+            ),
+            model=BetaModelInfo,
+        )
+
+
+class ModelsWithRawResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            models.list,
+        )
+
+
+class AsyncModelsWithRawResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            models.list,
+        )
+
+
+class ModelsWithStreamingResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            models.list,
+        )
+
+
+class AsyncModelsWithStreamingResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            models.list,
+        )
diff --git a/src/anthropic/resources/beta/prompt_caching/messages.py b/src/anthropic/resources/beta/prompt_caching/messages.py
deleted file mode 100644
index 1a5dac35..00000000
--- a/src/anthropic/resources/beta/prompt_caching/messages.py
+++ /dev/null
@@ -1,1954 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union, Iterable
-from functools import partial
-from itertools import chain
-from typing_extensions import Literal, overload
-
-import httpx
-
-from .... import _legacy_response
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    is_given,
-    required_args,
-    maybe_transform,
-    strip_not_given,
-    async_maybe_transform,
-)
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...._constants import DEFAULT_TIMEOUT
-from ...._streaming import Stream, AsyncStream
-from ...._base_client import make_request_options
-from ....lib.streaming import PromptCachingBetaMessageStreamManager, AsyncPromptCachingBetaMessageStreamManager
-from ....types.model_param import ModelParam
-from ....types.metadata_param import MetadataParam
-from ....types.tool_choice_param import ToolChoiceParam
-from ....types.beta.prompt_caching import message_create_params
-from ....types.anthropic_beta_param import AnthropicBetaParam
-from ....types.beta.prompt_caching.prompt_caching_beta_message import PromptCachingBetaMessage
-from ....types.beta.prompt_caching.prompt_caching_beta_tool_param import PromptCachingBetaToolParam
-from ....types.beta.prompt_caching.prompt_caching_beta_message_param import PromptCachingBetaMessageParam
-from ....types.beta.prompt_caching.prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
-from ....types.beta.prompt_caching.raw_prompt_caching_beta_message_stream_event import (
-    RawPromptCachingBetaMessageStreamEvent,
-)
-
-__all__ = ["Messages", "AsyncMessages"]
-
-
-class Messages(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> MessagesWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return the
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
-        """
-        return MessagesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> MessagesWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
-        """
-        return MessagesWithStreamingResponse(self)
-
-    @overload
-    def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessage:
-        """
-        Send a structured list of input messages with text and/or image content, and the
-        model will generate the next message in the conversation.
-
-        The Messages API can be used for either single queries or stateless multi-turn
-        conversations.
-
-        Args:
-          max_tokens: The maximum number of tokens to generate before stopping.
-
-              Note that our models may stop _before_ reaching this maximum. This parameter
-              only specifies the absolute maximum number of tokens to generate.
-
-              Different models have different maximum values for this parameter. See
-              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-
-          messages: Input messages.
-
-              Our models are trained to operate on alternating `user` and `assistant`
-              conversational turns. When creating a new `Message`, you specify the prior
-              conversational turns with the `messages` parameter, and the model then generates
-              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
-              in your request will be combined into a single turn.
-
-              Each input message must be an object with a `role` and `content`. You can
-              specify a single `user`-role message, or you can include multiple `user` and
-              `assistant` messages.
-
-              If the final message uses the `assistant` role, the response content will
-              continue immediately from the content in that message. This can be used to
-              constrain part of the model's response.
-
-              Example with a single `user` message:
-
-              ```json
-              [{ "role": "user", "content": "Hello, Claude" }]
-              ```
-
-              Example with multiple conversational turns:
-
-              ```json
-              [
-                { "role": "user", "content": "Hello there." },
-                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
-                { "role": "user", "content": "Can you explain LLMs in plain English?" }
-              ]
-              ```
-
-              Example with a partially-filled response from Claude:
-
-              ```json
-              [
-                {
-                  "role": "user",
-                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-                },
-                { "role": "assistant", "content": "The best answer is (" }
-              ]
-              ```
-
-              Each input message `content` may be either a single `string` or an array of
-              content blocks, where each block has a specific `type`. Using a `string` for
-              `content` is shorthand for an array of one content block of type `"text"`. The
-              following input messages are equivalent:
-
-              ```json
-              { "role": "user", "content": "Hello, Claude" }
-              ```
-
-              ```json
-              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
-              ```
-
-              Starting with Claude 3 models, you can also send image content blocks:
-
-              ```json
-              {
-                "role": "user",
-                "content": [
-                  {
-                    "type": "image",
-                    "source": {
-                      "type": "base64",
-                      "media_type": "image/jpeg",
-                      "data": "/9j/4AAQSkZJRg..."
-                    }
-                  },
-                  { "type": "text", "text": "What is in this image?" }
-                ]
-              }
-              ```
-
-              We currently support the `base64` source type for images, and the `image/jpeg`,
-              `image/png`, `image/gif`, and `image/webp` media types.
-
-              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
-              more input examples.
-
-              Note that if you want to include a
-              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
-              the top-level `system` parameter — there is no `"system"` role for input
-              messages in the Messages API.
-
-          model: The model that will complete your prompt.\n\nSee
-              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-              details and options.
-
-          metadata: An object describing metadata about the request.
-
-          stop_sequences: Custom text sequences that will cause the model to stop generating.
-
-              Our models will normally stop when they have naturally completed their turn,
-              which will result in a response `stop_reason` of `"end_turn"`.
-
-              If you want the model to stop generating when it encounters custom strings of
-              text, you can use the `stop_sequences` parameter. If the model encounters one of
-              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-              and the response `stop_sequence` value will contain the matched stop sequence.
-
-          stream: Whether to incrementally stream the response using server-sent events.
-
-              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-              details.
-
-          system: System prompt.
-
-              A system prompt is a way of providing context and instructions to Claude, such
-              as specifying a particular goal or role. See our
-              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
-
-          temperature: Amount of randomness injected into the response.
-
-              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-              for analytical / multiple choice, and closer to `1.0` for creative and
-              generative tasks.
-
-              Note that even with `temperature` of `0.0`, the results will not be fully
-              deterministic.
-
-          tool_choice: How the model should use the provided tools. The model can use a specific tool,
-              any available tool, or decide by itself.
-
-          tools: Definitions of tools that the model may use.
-
-              If you include `tools` in your API request, the model may return `tool_use`
-              content blocks that represent the model's use of those tools. You can then run
-              those tools using the tool input generated by the model and then optionally
-              return results back to the model using `tool_result` content blocks.
-
-              Each tool definition includes:
-
-              - `name`: Name of the tool.
-              - `description`: Optional, but strongly-recommended description of the tool.
-              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
-                shape that the model will produce in `tool_use` output content blocks.
-
-              For example, if you defined `tools` as:
-
-              ```json
-              [
-                {
-                  "name": "get_stock_price",
-                  "description": "Get the current stock price for a given ticker symbol.",
-                  "input_schema": {
-                    "type": "object",
-                    "properties": {
-                      "ticker": {
-                        "type": "string",
-                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
-                      }
-                    },
-                    "required": ["ticker"]
-                  }
-                }
-              ]
-              ```
-
-              And then asked the model "What's the S&P 500 at today?", the model might produce
-              `tool_use` content blocks in the response like this:
-
-              ```json
-              [
-                {
-                  "type": "tool_use",
-                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "name": "get_stock_price",
-                  "input": { "ticker": "^GSPC" }
-                }
-              ]
-              ```
-
-              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
-              input, and return the following back to the model in a subsequent `user`
-              message:
-
-              ```json
-              [
-                {
-                  "type": "tool_result",
-                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "content": "259.75 USD"
-                }
-              ]
-              ```
-
-              Tools can be used for workflows that include running client-side tools and
-              functions, or more generally whenever you want the model to produce a particular
-              JSON structure of output.
-
-              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
-
-          top_k: Only sample from the top K options for each subsequent token.
-
-              Used to remove "long tail" low probability responses.
-              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          top_p: Use nucleus sampling.
-
-              In nucleus sampling, we compute the cumulative distribution over all the options
-              for each subsequent token in decreasing probability order and cut it off once it
-              reaches a particular probability specified by `top_p`. You should either alter
-              `temperature` or `top_p`, but not both.
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          betas: Optional header to specify the beta version(s) you want to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        stream: Literal[True],
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[RawPromptCachingBetaMessageStreamEvent]:
-        """
-        Send a structured list of input messages with text and/or image content, and the
-        model will generate the next message in the conversation.
-
-        The Messages API can be used for either single queries or stateless multi-turn
-        conversations.
-
-        Args:
-          max_tokens: The maximum number of tokens to generate before stopping.
-
-              Note that our models may stop _before_ reaching this maximum. This parameter
-              only specifies the absolute maximum number of tokens to generate.
-
-              Different models have different maximum values for this parameter. See
-              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-
-          messages: Input messages.
-
-              Our models are trained to operate on alternating `user` and `assistant`
-              conversational turns. When creating a new `Message`, you specify the prior
-              conversational turns with the `messages` parameter, and the model then generates
-              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
-              in your request will be combined into a single turn.
-
-              Each input message must be an object with a `role` and `content`. You can
-              specify a single `user`-role message, or you can include multiple `user` and
-              `assistant` messages.
-
-              If the final message uses the `assistant` role, the response content will
-              continue immediately from the content in that message. This can be used to
-              constrain part of the model's response.
-
-              Example with a single `user` message:
-
-              ```json
-              [{ "role": "user", "content": "Hello, Claude" }]
-              ```
-
-              Example with multiple conversational turns:
-
-              ```json
-              [
-                { "role": "user", "content": "Hello there." },
-                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
-                { "role": "user", "content": "Can you explain LLMs in plain English?" }
-              ]
-              ```
-
-              Example with a partially-filled response from Claude:
-
-              ```json
-              [
-                {
-                  "role": "user",
-                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-                },
-                { "role": "assistant", "content": "The best answer is (" }
-              ]
-              ```
-
-              Each input message `content` may be either a single `string` or an array of
-              content blocks, where each block has a specific `type`. Using a `string` for
-              `content` is shorthand for an array of one content block of type `"text"`. The
-              following input messages are equivalent:
-
-              ```json
-              { "role": "user", "content": "Hello, Claude" }
-              ```
-
-              ```json
-              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
-              ```
-
-              Starting with Claude 3 models, you can also send image content blocks:
-
-              ```json
-              {
-                "role": "user",
-                "content": [
-                  {
-                    "type": "image",
-                    "source": {
-                      "type": "base64",
-                      "media_type": "image/jpeg",
-                      "data": "/9j/4AAQSkZJRg..."
-                    }
-                  },
-                  { "type": "text", "text": "What is in this image?" }
-                ]
-              }
-              ```
-
-              We currently support the `base64` source type for images, and the `image/jpeg`,
-              `image/png`, `image/gif`, and `image/webp` media types.
-
-              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
-              more input examples.
-
-              Note that if you want to include a
-              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
-              the top-level `system` parameter — there is no `"system"` role for input
-              messages in the Messages API.
-
-          model: The model that will complete your prompt.\n\nSee
-              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-              details and options.
-
-          stream: Whether to incrementally stream the response using server-sent events.
-
-              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-              details.
-
-          metadata: An object describing metadata about the request.
-
-          stop_sequences: Custom text sequences that will cause the model to stop generating.
-
-              Our models will normally stop when they have naturally completed their turn,
-              which will result in a response `stop_reason` of `"end_turn"`.
-
-              If you want the model to stop generating when it encounters custom strings of
-              text, you can use the `stop_sequences` parameter. If the model encounters one of
-              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-              and the response `stop_sequence` value will contain the matched stop sequence.
-
-          system: System prompt.
-
-              A system prompt is a way of providing context and instructions to Claude, such
-              as specifying a particular goal or role. See our
-              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
-
-          temperature: Amount of randomness injected into the response.
-
-              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-              for analytical / multiple choice, and closer to `1.0` for creative and
-              generative tasks.
-
-              Note that even with `temperature` of `0.0`, the results will not be fully
-              deterministic.
-
-          tool_choice: How the model should use the provided tools. The model can use a specific tool,
-              any available tool, or decide by itself.
-
-          tools: Definitions of tools that the model may use.
-
-              If you include `tools` in your API request, the model may return `tool_use`
-              content blocks that represent the model's use of those tools. You can then run
-              those tools using the tool input generated by the model and then optionally
-              return results back to the model using `tool_result` content blocks.
-
-              Each tool definition includes:
-
-              - `name`: Name of the tool.
-              - `description`: Optional, but strongly-recommended description of the tool.
-              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
-                shape that the model will produce in `tool_use` output content blocks.
-
-              For example, if you defined `tools` as:
-
-              ```json
-              [
-                {
-                  "name": "get_stock_price",
-                  "description": "Get the current stock price for a given ticker symbol.",
-                  "input_schema": {
-                    "type": "object",
-                    "properties": {
-                      "ticker": {
-                        "type": "string",
-                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
-                      }
-                    },
-                    "required": ["ticker"]
-                  }
-                }
-              ]
-              ```
-
-              And then asked the model "What's the S&P 500 at today?", the model might produce
-              `tool_use` content blocks in the response like this:
-
-              ```json
-              [
-                {
-                  "type": "tool_use",
-                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "name": "get_stock_price",
-                  "input": { "ticker": "^GSPC" }
-                }
-              ]
-              ```
-
-              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
-              input, and return the following back to the model in a subsequent `user`
-              message:
-
-              ```json
-              [
-                {
-                  "type": "tool_result",
-                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "content": "259.75 USD"
-                }
-              ]
-              ```
-
-              Tools can be used for workflows that include running client-side tools and
-              functions, or more generally whenever you want the model to produce a particular
-              JSON structure of output.
-
-              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
-
-          top_k: Only sample from the top K options for each subsequent token.
-
-              Used to remove "long tail" low probability responses.
-              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          top_p: Use nucleus sampling.
-
-              In nucleus sampling, we compute the cumulative distribution over all the options
-              for each subsequent token in decreasing probability order and cut it off once it
-              reaches a particular probability specified by `top_p`. You should either alter
-              `temperature` or `top_p`, but not both.
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          betas: Optional header to specify the beta version(s) you want to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        stream: bool,
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessage | Stream[RawPromptCachingBetaMessageStreamEvent]:
-        """
-        Send a structured list of input messages with text and/or image content, and the
-        model will generate the next message in the conversation.
-
-        The Messages API can be used for either single queries or stateless multi-turn
-        conversations.
-
-        Args:
-          max_tokens: The maximum number of tokens to generate before stopping.
-
-              Note that our models may stop _before_ reaching this maximum. This parameter
-              only specifies the absolute maximum number of tokens to generate.
-
-              Different models have different maximum values for this parameter. See
-              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-
-          messages: Input messages.
-
-              Our models are trained to operate on alternating `user` and `assistant`
-              conversational turns. When creating a new `Message`, you specify the prior
-              conversational turns with the `messages` parameter, and the model then generates
-              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
-              in your request will be combined into a single turn.
-
-              Each input message must be an object with a `role` and `content`. You can
-              specify a single `user`-role message, or you can include multiple `user` and
-              `assistant` messages.
-
-              If the final message uses the `assistant` role, the response content will
-              continue immediately from the content in that message. This can be used to
-              constrain part of the model's response.
-
-              Example with a single `user` message:
-
-              ```json
-              [{ "role": "user", "content": "Hello, Claude" }]
-              ```
-
-              Example with multiple conversational turns:
-
-              ```json
-              [
-                { "role": "user", "content": "Hello there." },
-                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
-                { "role": "user", "content": "Can you explain LLMs in plain English?" }
-              ]
-              ```
-
-              Example with a partially-filled response from Claude:
-
-              ```json
-              [
-                {
-                  "role": "user",
-                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-                },
-                { "role": "assistant", "content": "The best answer is (" }
-              ]
-              ```
-
-              Each input message `content` may be either a single `string` or an array of
-              content blocks, where each block has a specific `type`. Using a `string` for
-              `content` is shorthand for an array of one content block of type `"text"`. The
-              following input messages are equivalent:
-
-              ```json
-              { "role": "user", "content": "Hello, Claude" }
-              ```
-
-              ```json
-              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
-              ```
-
-              Starting with Claude 3 models, you can also send image content blocks:
-
-              ```json
-              {
-                "role": "user",
-                "content": [
-                  {
-                    "type": "image",
-                    "source": {
-                      "type": "base64",
-                      "media_type": "image/jpeg",
-                      "data": "/9j/4AAQSkZJRg..."
-                    }
-                  },
-                  { "type": "text", "text": "What is in this image?" }
-                ]
-              }
-              ```
-
-              We currently support the `base64` source type for images, and the `image/jpeg`,
-              `image/png`, `image/gif`, and `image/webp` media types.
-
-              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
-              more input examples.
-
-              Note that if you want to include a
-              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
-              the top-level `system` parameter — there is no `"system"` role for input
-              messages in the Messages API.
-
-          model: The model that will complete your prompt.\n\nSee
-              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-              details and options.
-
-          stream: Whether to incrementally stream the response using server-sent events.
-
-              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-              details.
-
-          metadata: An object describing metadata about the request.
-
-          stop_sequences: Custom text sequences that will cause the model to stop generating.
-
-              Our models will normally stop when they have naturally completed their turn,
-              which will result in a response `stop_reason` of `"end_turn"`.
-
-              If you want the model to stop generating when it encounters custom strings of
-              text, you can use the `stop_sequences` parameter. If the model encounters one of
-              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-              and the response `stop_sequence` value will contain the matched stop sequence.
-
-          system: System prompt.
-
-              A system prompt is a way of providing context and instructions to Claude, such
-              as specifying a particular goal or role. See our
-              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
-
-          temperature: Amount of randomness injected into the response.
-
-              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-              for analytical / multiple choice, and closer to `1.0` for creative and
-              generative tasks.
-
-              Note that even with `temperature` of `0.0`, the results will not be fully
-              deterministic.
-
-          tool_choice: How the model should use the provided tools. The model can use a specific tool,
-              any available tool, or decide by itself.
-
-          tools: Definitions of tools that the model may use.
-
-              If you include `tools` in your API request, the model may return `tool_use`
-              content blocks that represent the model's use of those tools. You can then run
-              those tools using the tool input generated by the model and then optionally
-              return results back to the model using `tool_result` content blocks.
-
-              Each tool definition includes:
-
-              - `name`: Name of the tool.
-              - `description`: Optional, but strongly-recommended description of the tool.
-              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
-                shape that the model will produce in `tool_use` output content blocks.
-
-              For example, if you defined `tools` as:
-
-              ```json
-              [
-                {
-                  "name": "get_stock_price",
-                  "description": "Get the current stock price for a given ticker symbol.",
-                  "input_schema": {
-                    "type": "object",
-                    "properties": {
-                      "ticker": {
-                        "type": "string",
-                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
-                      }
-                    },
-                    "required": ["ticker"]
-                  }
-                }
-              ]
-              ```
-
-              And then asked the model "What's the S&P 500 at today?", the model might produce
-              `tool_use` content blocks in the response like this:
-
-              ```json
-              [
-                {
-                  "type": "tool_use",
-                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "name": "get_stock_price",
-                  "input": { "ticker": "^GSPC" }
-                }
-              ]
-              ```
-
-              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
-              input, and return the following back to the model in a subsequent `user`
-              message:
-
-              ```json
-              [
-                {
-                  "type": "tool_result",
-                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "content": "259.75 USD"
-                }
-              ]
-              ```
-
-              Tools can be used for workflows that include running client-side tools and
-              functions, or more generally whenever you want the model to produce a particular
-              JSON structure of output.
-
-              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
-
-          top_k: Only sample from the top K options for each subsequent token.
-
-              Used to remove "long tail" low probability responses.
-              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          top_p: Use nucleus sampling.
-
-              In nucleus sampling, we compute the cumulative distribution over all the options
-              for each subsequent token in decreasing probability order and cut it off once it
-              reaches a particular probability specified by `top_p`. You should either alter
-              `temperature` or `top_p`, but not both.
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          betas: Optional header to specify the beta version(s) you want to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
-    def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessage | Stream[RawPromptCachingBetaMessageStreamEvent]:
-        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
-            timeout = 600
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["prompt-caching-2024-07-31"]))
-                    if is_given(betas)
-                    else NOT_GIVEN
-                }
-            ),
-            **(extra_headers or {}),
-        }
-        extra_headers = {"anthropic-beta": "prompt-caching-2024-07-31", **(extra_headers or {})}
-        return self._post(
-            "/v1/messages?beta=prompt_caching",
-            body=maybe_transform(
-                {
-                    "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
-                    "metadata": metadata,
-                    "stop_sequences": stop_sequences,
-                    "stream": stream,
-                    "system": system,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_k": top_k,
-                    "top_p": top_p,
-                },
-                message_create_params.MessageCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PromptCachingBetaMessage,
-            stream=stream or False,
-            stream_cls=Stream[RawPromptCachingBetaMessageStreamEvent],
-        )
-
-    def stream(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessageStreamManager:
-        """Create a Message stream"""
-        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
-            timeout = 600
-
-        extra_headers = {
-            "anthropic-beta": "prompt-caching-2024-07-31",
-            "X-Stainless-Stream-Helper": "beta.prompt_caching.messages",
-            **(extra_headers or {}),
-        }
-        request = partial(
-            self._post,
-            "/v1/messages?beta=prompt_caching",
-            body=maybe_transform(
-                {
-                    "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
-                    "metadata": metadata,
-                    "stop_sequences": stop_sequences,
-                    "stream": True,
-                    "system": system,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_k": top_k,
-                    "top_p": top_p,
-                },
-                message_create_params.MessageCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PromptCachingBetaMessage,
-            stream=True,
-            stream_cls=Stream[RawPromptCachingBetaMessageStreamEvent],
-        )
-        return PromptCachingBetaMessageStreamManager(request)
-
-
-class AsyncMessages(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return the
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncMessagesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
-        """
-        return AsyncMessagesWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessage:
-        """
-        Send a structured list of input messages with text and/or image content, and the
-        model will generate the next message in the conversation.
-
-        The Messages API can be used for either single queries or stateless multi-turn
-        conversations.
-
-        Args:
-          max_tokens: The maximum number of tokens to generate before stopping.
-
-              Note that our models may stop _before_ reaching this maximum. This parameter
-              only specifies the absolute maximum number of tokens to generate.
-
-              Different models have different maximum values for this parameter. See
-              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-
-          messages: Input messages.
-
-              Our models are trained to operate on alternating `user` and `assistant`
-              conversational turns. When creating a new `Message`, you specify the prior
-              conversational turns with the `messages` parameter, and the model then generates
-              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
-              in your request will be combined into a single turn.
-
-              Each input message must be an object with a `role` and `content`. You can
-              specify a single `user`-role message, or you can include multiple `user` and
-              `assistant` messages.
-
-              If the final message uses the `assistant` role, the response content will
-              continue immediately from the content in that message. This can be used to
-              constrain part of the model's response.
-
-              Example with a single `user` message:
-
-              ```json
-              [{ "role": "user", "content": "Hello, Claude" }]
-              ```
-
-              Example with multiple conversational turns:
-
-              ```json
-              [
-                { "role": "user", "content": "Hello there." },
-                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
-                { "role": "user", "content": "Can you explain LLMs in plain English?" }
-              ]
-              ```
-
-              Example with a partially-filled response from Claude:
-
-              ```json
-              [
-                {
-                  "role": "user",
-                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-                },
-                { "role": "assistant", "content": "The best answer is (" }
-              ]
-              ```
-
-              Each input message `content` may be either a single `string` or an array of
-              content blocks, where each block has a specific `type`. Using a `string` for
-              `content` is shorthand for an array of one content block of type `"text"`. The
-              following input messages are equivalent:
-
-              ```json
-              { "role": "user", "content": "Hello, Claude" }
-              ```
-
-              ```json
-              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
-              ```
-
-              Starting with Claude 3 models, you can also send image content blocks:
-
-              ```json
-              {
-                "role": "user",
-                "content": [
-                  {
-                    "type": "image",
-                    "source": {
-                      "type": "base64",
-                      "media_type": "image/jpeg",
-                      "data": "/9j/4AAQSkZJRg..."
-                    }
-                  },
-                  { "type": "text", "text": "What is in this image?" }
-                ]
-              }
-              ```
-
-              We currently support the `base64` source type for images, and the `image/jpeg`,
-              `image/png`, `image/gif`, and `image/webp` media types.
-
-              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
-              more input examples.
-
-              Note that if you want to include a
-              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
-              the top-level `system` parameter — there is no `"system"` role for input
-              messages in the Messages API.
-
-          model: The model that will complete your prompt.\n\nSee
-              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-              details and options.
-
-          metadata: An object describing metadata about the request.
-
-          stop_sequences: Custom text sequences that will cause the model to stop generating.
-
-              Our models will normally stop when they have naturally completed their turn,
-              which will result in a response `stop_reason` of `"end_turn"`.
-
-              If you want the model to stop generating when it encounters custom strings of
-              text, you can use the `stop_sequences` parameter. If the model encounters one of
-              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-              and the response `stop_sequence` value will contain the matched stop sequence.
-
-          stream: Whether to incrementally stream the response using server-sent events.
-
-              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-              details.
-
-          system: System prompt.
-
-              A system prompt is a way of providing context and instructions to Claude, such
-              as specifying a particular goal or role. See our
-              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
-
-          temperature: Amount of randomness injected into the response.
-
-              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-              for analytical / multiple choice, and closer to `1.0` for creative and
-              generative tasks.
-
-              Note that even with `temperature` of `0.0`, the results will not be fully
-              deterministic.
-
-          tool_choice: How the model should use the provided tools. The model can use a specific tool,
-              any available tool, or decide by itself.
-
-          tools: Definitions of tools that the model may use.
-
-              If you include `tools` in your API request, the model may return `tool_use`
-              content blocks that represent the model's use of those tools. You can then run
-              those tools using the tool input generated by the model and then optionally
-              return results back to the model using `tool_result` content blocks.
-
-              Each tool definition includes:
-
-              - `name`: Name of the tool.
-              - `description`: Optional, but strongly-recommended description of the tool.
-              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
-                shape that the model will produce in `tool_use` output content blocks.
-
-              For example, if you defined `tools` as:
-
-              ```json
-              [
-                {
-                  "name": "get_stock_price",
-                  "description": "Get the current stock price for a given ticker symbol.",
-                  "input_schema": {
-                    "type": "object",
-                    "properties": {
-                      "ticker": {
-                        "type": "string",
-                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
-                      }
-                    },
-                    "required": ["ticker"]
-                  }
-                }
-              ]
-              ```
-
-              And then asked the model "What's the S&P 500 at today?", the model might produce
-              `tool_use` content blocks in the response like this:
-
-              ```json
-              [
-                {
-                  "type": "tool_use",
-                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "name": "get_stock_price",
-                  "input": { "ticker": "^GSPC" }
-                }
-              ]
-              ```
-
-              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
-              input, and return the following back to the model in a subsequent `user`
-              message:
-
-              ```json
-              [
-                {
-                  "type": "tool_result",
-                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "content": "259.75 USD"
-                }
-              ]
-              ```
-
-              Tools can be used for workflows that include running client-side tools and
-              functions, or more generally whenever you want the model to produce a particular
-              JSON structure of output.
-
-              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
-
-          top_k: Only sample from the top K options for each subsequent token.
-
-              Used to remove "long tail" low probability responses.
-              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          top_p: Use nucleus sampling.
-
-              In nucleus sampling, we compute the cumulative distribution over all the options
-              for each subsequent token in decreasing probability order and cut it off once it
-              reaches a particular probability specified by `top_p`. You should either alter
-              `temperature` or `top_p`, but not both.
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          betas: Optional header to specify the beta version(s) you want to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        stream: Literal[True],
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[RawPromptCachingBetaMessageStreamEvent]:
-        """
-        Send a structured list of input messages with text and/or image content, and the
-        model will generate the next message in the conversation.
-
-        The Messages API can be used for either single queries or stateless multi-turn
-        conversations.
-
-        Args:
-          max_tokens: The maximum number of tokens to generate before stopping.
-
-              Note that our models may stop _before_ reaching this maximum. This parameter
-              only specifies the absolute maximum number of tokens to generate.
-
-              Different models have different maximum values for this parameter. See
-              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-
-          messages: Input messages.
-
-              Our models are trained to operate on alternating `user` and `assistant`
-              conversational turns. When creating a new `Message`, you specify the prior
-              conversational turns with the `messages` parameter, and the model then generates
-              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
-              in your request will be combined into a single turn.
-
-              Each input message must be an object with a `role` and `content`. You can
-              specify a single `user`-role message, or you can include multiple `user` and
-              `assistant` messages.
-
-              If the final message uses the `assistant` role, the response content will
-              continue immediately from the content in that message. This can be used to
-              constrain part of the model's response.
-
-              Example with a single `user` message:
-
-              ```json
-              [{ "role": "user", "content": "Hello, Claude" }]
-              ```
-
-              Example with multiple conversational turns:
-
-              ```json
-              [
-                { "role": "user", "content": "Hello there." },
-                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
-                { "role": "user", "content": "Can you explain LLMs in plain English?" }
-              ]
-              ```
-
-              Example with a partially-filled response from Claude:
-
-              ```json
-              [
-                {
-                  "role": "user",
-                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-                },
-                { "role": "assistant", "content": "The best answer is (" }
-              ]
-              ```
-
-              Each input message `content` may be either a single `string` or an array of
-              content blocks, where each block has a specific `type`. Using a `string` for
-              `content` is shorthand for an array of one content block of type `"text"`. The
-              following input messages are equivalent:
-
-              ```json
-              { "role": "user", "content": "Hello, Claude" }
-              ```
-
-              ```json
-              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
-              ```
-
-              Starting with Claude 3 models, you can also send image content blocks:
-
-              ```json
-              {
-                "role": "user",
-                "content": [
-                  {
-                    "type": "image",
-                    "source": {
-                      "type": "base64",
-                      "media_type": "image/jpeg",
-                      "data": "/9j/4AAQSkZJRg..."
-                    }
-                  },
-                  { "type": "text", "text": "What is in this image?" }
-                ]
-              }
-              ```
-
-              We currently support the `base64` source type for images, and the `image/jpeg`,
-              `image/png`, `image/gif`, and `image/webp` media types.
-
-              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
-              more input examples.
-
-              Note that if you want to include a
-              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
-              the top-level `system` parameter — there is no `"system"` role for input
-              messages in the Messages API.
-
-          model: The model that will complete your prompt.\n\nSee
-              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-              details and options.
-
-          stream: Whether to incrementally stream the response using server-sent events.
-
-              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-              details.
-
-          metadata: An object describing metadata about the request.
-
-          stop_sequences: Custom text sequences that will cause the model to stop generating.
-
-              Our models will normally stop when they have naturally completed their turn,
-              which will result in a response `stop_reason` of `"end_turn"`.
-
-              If you want the model to stop generating when it encounters custom strings of
-              text, you can use the `stop_sequences` parameter. If the model encounters one of
-              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-              and the response `stop_sequence` value will contain the matched stop sequence.
-
-          system: System prompt.
-
-              A system prompt is a way of providing context and instructions to Claude, such
-              as specifying a particular goal or role. See our
-              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
-
-          temperature: Amount of randomness injected into the response.
-
-              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-              for analytical / multiple choice, and closer to `1.0` for creative and
-              generative tasks.
-
-              Note that even with `temperature` of `0.0`, the results will not be fully
-              deterministic.
-
-          tool_choice: How the model should use the provided tools. The model can use a specific tool,
-              any available tool, or decide by itself.
-
-          tools: Definitions of tools that the model may use.
-
-              If you include `tools` in your API request, the model may return `tool_use`
-              content blocks that represent the model's use of those tools. You can then run
-              those tools using the tool input generated by the model and then optionally
-              return results back to the model using `tool_result` content blocks.
-
-              Each tool definition includes:
-
-              - `name`: Name of the tool.
-              - `description`: Optional, but strongly-recommended description of the tool.
-              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
-                shape that the model will produce in `tool_use` output content blocks.
-
-              For example, if you defined `tools` as:
-
-              ```json
-              [
-                {
-                  "name": "get_stock_price",
-                  "description": "Get the current stock price for a given ticker symbol.",
-                  "input_schema": {
-                    "type": "object",
-                    "properties": {
-                      "ticker": {
-                        "type": "string",
-                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
-                      }
-                    },
-                    "required": ["ticker"]
-                  }
-                }
-              ]
-              ```
-
-              And then asked the model "What's the S&P 500 at today?", the model might produce
-              `tool_use` content blocks in the response like this:
-
-              ```json
-              [
-                {
-                  "type": "tool_use",
-                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "name": "get_stock_price",
-                  "input": { "ticker": "^GSPC" }
-                }
-              ]
-              ```
-
-              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
-              input, and return the following back to the model in a subsequent `user`
-              message:
-
-              ```json
-              [
-                {
-                  "type": "tool_result",
-                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "content": "259.75 USD"
-                }
-              ]
-              ```
-
-              Tools can be used for workflows that include running client-side tools and
-              functions, or more generally whenever you want the model to produce a particular
-              JSON structure of output.
-
-              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
-
-          top_k: Only sample from the top K options for each subsequent token.
-
-              Used to remove "long tail" low probability responses.
-              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          top_p: Use nucleus sampling.
-
-              In nucleus sampling, we compute the cumulative distribution over all the options
-              for each subsequent token in decreasing probability order and cut it off once it
-              reaches a particular probability specified by `top_p`. You should either alter
-              `temperature` or `top_p`, but not both.
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          betas: Optional header to specify the beta version(s) you want to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        stream: bool,
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessage | AsyncStream[RawPromptCachingBetaMessageStreamEvent]:
-        """
-        Send a structured list of input messages with text and/or image content, and the
-        model will generate the next message in the conversation.
-
-        The Messages API can be used for either single queries or stateless multi-turn
-        conversations.
-
-        Args:
-          max_tokens: The maximum number of tokens to generate before stopping.
-
-              Note that our models may stop _before_ reaching this maximum. This parameter
-              only specifies the absolute maximum number of tokens to generate.
-
-              Different models have different maximum values for this parameter. See
-              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-
-          messages: Input messages.
-
-              Our models are trained to operate on alternating `user` and `assistant`
-              conversational turns. When creating a new `Message`, you specify the prior
-              conversational turns with the `messages` parameter, and the model then generates
-              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
-              in your request will be combined into a single turn.
-
-              Each input message must be an object with a `role` and `content`. You can
-              specify a single `user`-role message, or you can include multiple `user` and
-              `assistant` messages.
-
-              If the final message uses the `assistant` role, the response content will
-              continue immediately from the content in that message. This can be used to
-              constrain part of the model's response.
-
-              Example with a single `user` message:
-
-              ```json
-              [{ "role": "user", "content": "Hello, Claude" }]
-              ```
-
-              Example with multiple conversational turns:
-
-              ```json
-              [
-                { "role": "user", "content": "Hello there." },
-                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
-                { "role": "user", "content": "Can you explain LLMs in plain English?" }
-              ]
-              ```
-
-              Example with a partially-filled response from Claude:
-
-              ```json
-              [
-                {
-                  "role": "user",
-                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-                },
-                { "role": "assistant", "content": "The best answer is (" }
-              ]
-              ```
-
-              Each input message `content` may be either a single `string` or an array of
-              content blocks, where each block has a specific `type`. Using a `string` for
-              `content` is shorthand for an array of one content block of type `"text"`. The
-              following input messages are equivalent:
-
-              ```json
-              { "role": "user", "content": "Hello, Claude" }
-              ```
-
-              ```json
-              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
-              ```
-
-              Starting with Claude 3 models, you can also send image content blocks:
-
-              ```json
-              {
-                "role": "user",
-                "content": [
-                  {
-                    "type": "image",
-                    "source": {
-                      "type": "base64",
-                      "media_type": "image/jpeg",
-                      "data": "/9j/4AAQSkZJRg..."
-                    }
-                  },
-                  { "type": "text", "text": "What is in this image?" }
-                ]
-              }
-              ```
-
-              We currently support the `base64` source type for images, and the `image/jpeg`,
-              `image/png`, `image/gif`, and `image/webp` media types.
-
-              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
-              more input examples.
-
-              Note that if you want to include a
-              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
-              the top-level `system` parameter — there is no `"system"` role for input
-              messages in the Messages API.
-
-          model: The model that will complete your prompt.\n\nSee
-              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-              details and options.
-
-          stream: Whether to incrementally stream the response using server-sent events.
-
-              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-              details.
-
-          metadata: An object describing metadata about the request.
-
-          stop_sequences: Custom text sequences that will cause the model to stop generating.
-
-              Our models will normally stop when they have naturally completed their turn,
-              which will result in a response `stop_reason` of `"end_turn"`.
-
-              If you want the model to stop generating when it encounters custom strings of
-              text, you can use the `stop_sequences` parameter. If the model encounters one of
-              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-              and the response `stop_sequence` value will contain the matched stop sequence.
-
-          system: System prompt.
-
-              A system prompt is a way of providing context and instructions to Claude, such
-              as specifying a particular goal or role. See our
-              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
-
-          temperature: Amount of randomness injected into the response.
-
-              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-              for analytical / multiple choice, and closer to `1.0` for creative and
-              generative tasks.
-
-              Note that even with `temperature` of `0.0`, the results will not be fully
-              deterministic.
-
-          tool_choice: How the model should use the provided tools. The model can use a specific tool,
-              any available tool, or decide by itself.
-
-          tools: Definitions of tools that the model may use.
-
-              If you include `tools` in your API request, the model may return `tool_use`
-              content blocks that represent the model's use of those tools. You can then run
-              those tools using the tool input generated by the model and then optionally
-              return results back to the model using `tool_result` content blocks.
-
-              Each tool definition includes:
-
-              - `name`: Name of the tool.
-              - `description`: Optional, but strongly-recommended description of the tool.
-              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
-                shape that the model will produce in `tool_use` output content blocks.
-
-              For example, if you defined `tools` as:
-
-              ```json
-              [
-                {
-                  "name": "get_stock_price",
-                  "description": "Get the current stock price for a given ticker symbol.",
-                  "input_schema": {
-                    "type": "object",
-                    "properties": {
-                      "ticker": {
-                        "type": "string",
-                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
-                      }
-                    },
-                    "required": ["ticker"]
-                  }
-                }
-              ]
-              ```
-
-              And then asked the model "What's the S&P 500 at today?", the model might produce
-              `tool_use` content blocks in the response like this:
-
-              ```json
-              [
-                {
-                  "type": "tool_use",
-                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "name": "get_stock_price",
-                  "input": { "ticker": "^GSPC" }
-                }
-              ]
-              ```
-
-              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
-              input, and return the following back to the model in a subsequent `user`
-              message:
-
-              ```json
-              [
-                {
-                  "type": "tool_result",
-                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "content": "259.75 USD"
-                }
-              ]
-              ```
-
-              Tools can be used for workflows that include running client-side tools and
-              functions, or more generally whenever you want the model to produce a particular
-              JSON structure of output.
-
-              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
-
-          top_k: Only sample from the top K options for each subsequent token.
-
-              Used to remove "long tail" low probability responses.
-              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          top_p: Use nucleus sampling.
-
-              In nucleus sampling, we compute the cumulative distribution over all the options
-              for each subsequent token in decreasing probability order and cut it off once it
-              reaches a particular probability specified by `top_p`. You should either alter
-              `temperature` or `top_p`, but not both.
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          betas: Optional header to specify the beta version(s) you want to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
-    async def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessage | AsyncStream[RawPromptCachingBetaMessageStreamEvent]:
-        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
-            timeout = 600
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["prompt-caching-2024-07-31"]))
-                    if is_given(betas)
-                    else NOT_GIVEN
-                }
-            ),
-            **(extra_headers or {}),
-        }
-        extra_headers = {"anthropic-beta": "prompt-caching-2024-07-31", **(extra_headers or {})}
-        return await self._post(
-            "/v1/messages?beta=prompt_caching",
-            body=await async_maybe_transform(
-                {
-                    "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
-                    "metadata": metadata,
-                    "stop_sequences": stop_sequences,
-                    "stream": stream,
-                    "system": system,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_k": top_k,
-                    "top_p": top_p,
-                },
-                message_create_params.MessageCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PromptCachingBetaMessage,
-            stream=stream or False,
-            stream_cls=AsyncStream[RawPromptCachingBetaMessageStreamEvent],
-        )
-
-    def stream(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPromptCachingBetaMessageStreamManager:
-        """Create a Message stream"""
-        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
-            timeout = 600
-
-        extra_headers = {
-            "anthropic-beta": "prompt-caching-2024-07-31",
-            "X-Stainless-Stream-Helper": "beta.prompt_caching.messages",
-            **(extra_headers or {}),
-        }
-        request = self._post(
-            "/v1/messages?beta=prompt_caching",
-            body=maybe_transform(
-                {
-                    "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
-                    "metadata": metadata,
-                    "stop_sequences": stop_sequences,
-                    "stream": True,
-                    "system": system,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_k": top_k,
-                    "top_p": top_p,
-                },
-                message_create_params.MessageCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PromptCachingBetaMessage,
-            stream=True,
-            stream_cls=AsyncStream[RawPromptCachingBetaMessageStreamEvent],
-        )
-        return AsyncPromptCachingBetaMessageStreamManager(request)
-
-
-class MessagesWithRawResponse:
-    def __init__(self, messages: Messages) -> None:
-        self._messages = messages
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            messages.create,
-        )
-
-
-class AsyncMessagesWithRawResponse:
-    def __init__(self, messages: AsyncMessages) -> None:
-        self._messages = messages
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            messages.create,
-        )
-
-
-class MessagesWithStreamingResponse:
-    def __init__(self, messages: Messages) -> None:
-        self._messages = messages
-
-        self.create = to_streamed_response_wrapper(
-            messages.create,
-        )
-
-
-class AsyncMessagesWithStreamingResponse:
-    def __init__(self, messages: AsyncMessages) -> None:
-        self._messages = messages
-
-        self.create = async_to_streamed_response_wrapper(
-            messages.create,
-        )
diff --git a/src/anthropic/resources/beta/prompt_caching/prompt_caching.py b/src/anthropic/resources/beta/prompt_caching/prompt_caching.py
deleted file mode 100644
index 0154a0d3..00000000
--- a/src/anthropic/resources/beta/prompt_caching/prompt_caching.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .messages import (
-    Messages,
-    AsyncMessages,
-    MessagesWithRawResponse,
-    AsyncMessagesWithRawResponse,
-    MessagesWithStreamingResponse,
-    AsyncMessagesWithStreamingResponse,
-)
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-
-__all__ = ["PromptCaching", "AsyncPromptCaching"]
-
-
-class PromptCaching(SyncAPIResource):
-    @cached_property
-    def messages(self) -> Messages:
-        return Messages(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> PromptCachingWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return the
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
-        """
-        return PromptCachingWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> PromptCachingWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
-        """
-        return PromptCachingWithStreamingResponse(self)
-
-
-class AsyncPromptCaching(AsyncAPIResource):
-    @cached_property
-    def messages(self) -> AsyncMessages:
-        return AsyncMessages(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncPromptCachingWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return the
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncPromptCachingWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncPromptCachingWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
-        """
-        return AsyncPromptCachingWithStreamingResponse(self)
-
-
-class PromptCachingWithRawResponse:
-    def __init__(self, prompt_caching: PromptCaching) -> None:
-        self._prompt_caching = prompt_caching
-
-    @cached_property
-    def messages(self) -> MessagesWithRawResponse:
-        return MessagesWithRawResponse(self._prompt_caching.messages)
-
-
-class AsyncPromptCachingWithRawResponse:
-    def __init__(self, prompt_caching: AsyncPromptCaching) -> None:
-        self._prompt_caching = prompt_caching
-
-    @cached_property
-    def messages(self) -> AsyncMessagesWithRawResponse:
-        return AsyncMessagesWithRawResponse(self._prompt_caching.messages)
-
-
-class PromptCachingWithStreamingResponse:
-    def __init__(self, prompt_caching: PromptCaching) -> None:
-        self._prompt_caching = prompt_caching
-
-    @cached_property
-    def messages(self) -> MessagesWithStreamingResponse:
-        return MessagesWithStreamingResponse(self._prompt_caching.messages)
-
-
-class AsyncPromptCachingWithStreamingResponse:
-    def __init__(self, prompt_caching: AsyncPromptCaching) -> None:
-        self._prompt_caching = prompt_caching
-
-    @cached_property
-    def messages(self) -> AsyncMessagesWithStreamingResponse:
-        return AsyncMessagesWithStreamingResponse(self._prompt_caching.messages)
diff --git a/src/anthropic/resources/beta/prompt_caching/__init__.py b/src/anthropic/resources/messages/__init__.py
similarity index 52%
rename from src/anthropic/resources/beta/prompt_caching/__init__.py
rename to src/anthropic/resources/messages/__init__.py
index ccf0b0a8..6e7cf9d9 100644
--- a/src/anthropic/resources/beta/prompt_caching/__init__.py
+++ b/src/anthropic/resources/messages/__init__.py
@@ -1,6 +1,15 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
 from .messages import (
+    DEPRECATED_MODELS,
     Messages,
     AsyncMessages,
     MessagesWithRawResponse,
@@ -8,26 +17,19 @@
     MessagesWithStreamingResponse,
     AsyncMessagesWithStreamingResponse,
 )
-from .prompt_caching import (
-    PromptCaching,
-    AsyncPromptCaching,
-    PromptCachingWithRawResponse,
-    AsyncPromptCachingWithRawResponse,
-    PromptCachingWithStreamingResponse,
-    AsyncPromptCachingWithStreamingResponse,
-)
 
 __all__ = [
+    "Batches",
+    "AsyncBatches",
+    "BatchesWithRawResponse",
+    "AsyncBatchesWithRawResponse",
+    "BatchesWithStreamingResponse",
+    "AsyncBatchesWithStreamingResponse",
     "Messages",
     "AsyncMessages",
     "MessagesWithRawResponse",
     "AsyncMessagesWithRawResponse",
     "MessagesWithStreamingResponse",
     "AsyncMessagesWithStreamingResponse",
-    "PromptCaching",
-    "AsyncPromptCaching",
-    "PromptCachingWithRawResponse",
-    "AsyncPromptCachingWithRawResponse",
-    "PromptCachingWithStreamingResponse",
-    "AsyncPromptCachingWithStreamingResponse",
+    "DEPRECATED_MODELS",
 ]
diff --git a/src/anthropic/resources/messages/batches.py b/src/anthropic/resources/messages/batches.py
new file mode 100644
index 00000000..7124e954
--- /dev/null
+++ b/src/anthropic/resources/messages/batches.py
@@ -0,0 +1,618 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    BinaryAPIResponse,
+    AsyncBinaryAPIResponse,
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_streamed_response_wrapper,
+    to_custom_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_raw_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ...pagination import SyncPage, AsyncPage
+from ..._exceptions import AnthropicError
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.messages import MessageBatchIndividualResponse, batch_list_params, batch_create_params
+from ..._decoders.jsonl import JSONLDecoder, AsyncJSONLDecoder
+from ...types.messages.message_batch import MessageBatch
+
+__all__ = ["Batches", "AsyncBatches"]
+
+
+class Batches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return BatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return BatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        requests: Iterable[batch_create_params.Request],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """
+        Send a batch of Message creation requests.
+
+        The Message Batches API can be used to process multiple Messages API requests at
+        once. Once a Message Batch is created, it begins processing immediately. Batches
+        can take up to 24 hours to complete.
+
+        Args:
+          requests: List of requests for prompt completion. Each is an individual request to create
+              a Message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1/messages/batches",
+            body=maybe_transform({"requests": requests}, batch_create_params.BatchCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def retrieve(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """This endpoint is idempotent and can be used to poll for Message Batch
+        completion.
+
+        To access the results of a Message Batch, make a request to the
+        `results_url` field in the response.
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return self._get(
+            f"/v1/messages/batches/{message_batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[MessageBatch]:
+        """List all Message Batches within a Workspace.
+
+        Most recently created batches are
+        returned first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/messages/batches",
+            page=SyncPage[MessageBatch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=MessageBatch,
+        )
+
+    def cancel(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """Batches may be canceled any time before processing ends.
+
+        Once cancellation is
+        initiated, the batch enters a `canceling` state, at which time the system may
+        complete any in-progress, non-interruptible requests before finalizing
+        cancellation.
+
+        The number of canceled requests is specified in `request_counts`. To determine
+        which requests were canceled, check the individual results within the batch.
+        Note that cancellation may not result in any canceled requests if they were
+        non-interruptible.
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return self._post(
+            f"/v1/messages/batches/{message_batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def results(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> JSONLDecoder[MessageBatchIndividualResponse]:
+        """
+        Streams the results of a Message Batch as a `.jsonl` file.
+
+        Each line in the file is a JSON object containing the result of a single request
+        in the Message Batch. Results are not guaranteed to be in the same order as
+        requests. Use the `custom_id` field to match results to requests.
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+
+        batch = self.retrieve(message_batch_id=message_batch_id)
+        if not batch.results_url:
+            raise AnthropicError(
+                f"No `results_url` for the given batch; Has it finished processing? {batch.processing_status}"
+            )
+
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return self._get(
+            batch.results_url,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            stream=True,
+            cast_to=JSONLDecoder[MessageBatchIndividualResponse],
+        )
+
+
+class AsyncBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        requests: Iterable[batch_create_params.Request],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """
+        Send a batch of Message creation requests.
+
+        The Message Batches API can be used to process multiple Messages API requests at
+        once. Once a Message Batch is created, it begins processing immediately. Batches
+        can take up to 24 hours to complete.
+
+        Args:
+          requests: List of requests for prompt completion. Each is an individual request to create
+              a Message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1/messages/batches",
+            body=await async_maybe_transform({"requests": requests}, batch_create_params.BatchCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    async def retrieve(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """This endpoint is idempotent and can be used to poll for Message Batch
+        completion.
+
+        To access the results of a Message Batch, make a request to the
+        `results_url` field in the response.
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return await self._get(
+            f"/v1/messages/batches/{message_batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[MessageBatch, AsyncPage[MessageBatch]]:
+        """List all Message Batches within a Workspace.
+
+        Most recently created batches are
+        returned first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/messages/batches",
+            page=AsyncPage[MessageBatch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=MessageBatch,
+        )
+
+    async def cancel(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """Batches may be canceled any time before processing ends.
+
+        Once cancellation is
+        initiated, the batch enters a `canceling` state, at which time the system may
+        complete any in-progress, non-interruptible requests before finalizing
+        cancellation.
+
+        The number of canceled requests is specified in `request_counts`. To determine
+        which requests were canceled, check the individual results within the batch.
+        Note that cancellation may not result in any canceled requests if they were
+        non-interruptible.
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return await self._post(
+            f"/v1/messages/batches/{message_batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    async def results(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncJSONLDecoder[MessageBatchIndividualResponse]:
+        """
+        Streams the results of a Message Batch as a `.jsonl` file.
+
+        Each line in the file is a JSON object containing the result of a single request
+        in the Message Batch. Results are not guaranteed to be in the same order as
+        requests. Use the `custom_id` field to match results to requests.
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+
+        batch = await self.retrieve(message_batch_id=message_batch_id)
+        if not batch.results_url:
+            raise AnthropicError(
+                f"No `results_url` for the given batch; Has it finished processing? {batch.processing_status}"
+            )
+
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return await self._get(
+            batch.results_url,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            stream=True,
+            cast_to=AsyncJSONLDecoder[MessageBatchIndividualResponse],
+        )
+
+
+class BatchesWithRawResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            batches.cancel,
+        )
+        self.results = to_custom_raw_response_wrapper(
+            batches.results,
+            BinaryAPIResponse,
+        )
+
+
+class AsyncBatchesWithRawResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            batches.cancel,
+        )
+        self.results = async_to_custom_raw_response_wrapper(
+            batches.results,
+            AsyncBinaryAPIResponse,
+        )
+
+
+class BatchesWithStreamingResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            batches.cancel,
+        )
+        self.results = to_custom_streamed_response_wrapper(
+            batches.results,
+            StreamedBinaryAPIResponse,
+        )
+
+
+class AsyncBatchesWithStreamingResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = async_to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            batches.cancel,
+        )
+        self.results = async_to_custom_streamed_response_wrapper(
+            batches.results,
+            AsyncStreamedBinaryAPIResponse,
+        )
diff --git a/src/anthropic/resources/messages.py b/src/anthropic/resources/messages/messages.py
similarity index 80%
rename from src/anthropic/resources/messages.py
rename to src/anthropic/resources/messages/messages.py
index b5230807..88cc8605 100644
--- a/src/anthropic/resources/messages.py
+++ b/src/anthropic/resources/messages/messages.py
@@ -9,30 +9,39 @@
 
 import httpx
 
-from .. import _legacy_response
-from ..types import message_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
+from ... import _legacy_response
+from ...types import message_create_params, message_count_tokens_params
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
     is_given,
     required_args,
     maybe_transform,
     async_maybe_transform,
 )
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .._constants import DEFAULT_TIMEOUT
-from .._streaming import Stream, AsyncStream
-from .._base_client import make_request_options
-from ..lib.streaming import MessageStreamManager, AsyncMessageStreamManager
-from ..types.message import Message
-from ..types.tool_param import ToolParam
-from ..types.model_param import ModelParam
-from ..types.message_param import MessageParam
-from ..types.metadata_param import MetadataParam
-from ..types.text_block_param import TextBlockParam
-from ..types.tool_choice_param import ToolChoiceParam
-from ..types.raw_message_stream_event import RawMessageStreamEvent
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._constants import DEFAULT_TIMEOUT
+from ..._streaming import Stream, AsyncStream
+from ..._base_client import make_request_options
+from ...lib.streaming import MessageStreamManager, AsyncMessageStreamManager
+from ...types.message import Message
+from ...types.tool_param import ToolParam
+from ...types.model_param import ModelParam
+from ...types.message_param import MessageParam
+from ...types.metadata_param import MetadataParam
+from ...types.text_block_param import TextBlockParam
+from ...types.tool_choice_param import ToolChoiceParam
+from ...types.message_tokens_count import MessageTokensCount
+from ...types.raw_message_stream_event import RawMessageStreamEvent
 
 __all__ = ["Messages", "AsyncMessages"]
 
@@ -47,6 +56,10 @@
 
 
 class Messages(SyncAPIResource):
+    @cached_property
+    def batches(self) -> Batches:
+        return Batches(self._client)
+
     @cached_property
     def with_raw_response(self) -> MessagesWithRawResponse:
         """
@@ -974,8 +987,229 @@ def stream(
         )
         return MessageStreamManager(make_request)
 
+    def count_tokens(
+        self,
+        *,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageTokensCount:
+        """
+        Count the number of tokens in a Message.
+
+        The Token Count API can be used to count the number of tokens in a Message,
+        including tools, images, and documents, without creating it.
+
+        Args:
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, or decide by itself.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+                shape that the model will produce in `tool_use` output content blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1/messages/count_tokens",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "system": system,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                message_count_tokens_params.MessageCountTokensParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageTokensCount,
+        )
+
 
 class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def batches(self) -> AsyncBatches:
+        return AsyncBatches(self._client)
+
     @cached_property
     def with_raw_response(self) -> AsyncMessagesWithRawResponse:
         """
@@ -1902,6 +2136,223 @@ def stream(
         )
         return AsyncMessageStreamManager(request)
 
+    async def count_tokens(
+        self,
+        *,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageTokensCount:
+        """
+        Count the number of tokens in a Message.
+
+        The Token Count API can be used to count the number of tokens in a Message,
+        including tools, images, and documents, without creating it.
+
+        Args:
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, or decide by itself.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+                shape that the model will produce in `tool_use` output content blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1/messages/count_tokens",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "system": system,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                message_count_tokens_params.MessageCountTokensParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageTokensCount,
+        )
+
 
 class MessagesWithRawResponse:
     def __init__(self, messages: Messages) -> None:
@@ -1910,6 +2361,13 @@ def __init__(self, messages: Messages) -> None:
         self.create = _legacy_response.to_raw_response_wrapper(
             messages.create,
         )
+        self.count_tokens = _legacy_response.to_raw_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> BatchesWithRawResponse:
+        return BatchesWithRawResponse(self._messages.batches)
 
 
 class AsyncMessagesWithRawResponse:
@@ -1919,6 +2377,13 @@ def __init__(self, messages: AsyncMessages) -> None:
         self.create = _legacy_response.async_to_raw_response_wrapper(
             messages.create,
         )
+        self.count_tokens = _legacy_response.async_to_raw_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> AsyncBatchesWithRawResponse:
+        return AsyncBatchesWithRawResponse(self._messages.batches)
 
 
 class MessagesWithStreamingResponse:
@@ -1928,6 +2393,13 @@ def __init__(self, messages: Messages) -> None:
         self.create = to_streamed_response_wrapper(
             messages.create,
         )
+        self.count_tokens = to_streamed_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> BatchesWithStreamingResponse:
+        return BatchesWithStreamingResponse(self._messages.batches)
 
 
 class AsyncMessagesWithStreamingResponse:
@@ -1937,3 +2409,10 @@ def __init__(self, messages: AsyncMessages) -> None:
         self.create = async_to_streamed_response_wrapper(
             messages.create,
         )
+        self.count_tokens = async_to_streamed_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> AsyncBatchesWithStreamingResponse:
+        return AsyncBatchesWithStreamingResponse(self._messages.batches)
diff --git a/src/anthropic/resources/models.py b/src/anthropic/resources/models.py
new file mode 100644
index 00000000..aec102bf
--- /dev/null
+++ b/src/anthropic/resources/models.py
@@ -0,0 +1,300 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .. import _legacy_response
+from ..types import model_list_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..pagination import SyncPage, AsyncPage
+from .._base_client import AsyncPaginator, make_request_options
+from ..types.model_info import ModelInfo
+
+__all__ = ["Models", "AsyncModels"]
+
+
+class Models(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return ModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return ModelsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        model_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelInfo:
+        """
+        Get a specific model.
+
+        The Models API response can be used to determine information about a specific
+        model or resolve a model alias to a model ID.
+
+        Args:
+          model_id: Model identifier or alias.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model_id:
+            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
+        return self._get(
+            f"/v1/models/{model_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelInfo,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[ModelInfo]:
+        """
+        List available models.
+
+        The Models API response can be used to determine which models are available for
+        use in the API. More recently released models are listed first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/models",
+            page=SyncPage[ModelInfo],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    model_list_params.ModelListParams,
+                ),
+            ),
+            model=ModelInfo,
+        )
+
+
+class AsyncModels(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncModelsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        model_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelInfo:
+        """
+        Get a specific model.
+
+        The Models API response can be used to determine information about a specific
+        model or resolve a model alias to a model ID.
+
+        Args:
+          model_id: Model identifier or alias.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model_id:
+            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
+        return await self._get(
+            f"/v1/models/{model_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelInfo,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[ModelInfo, AsyncPage[ModelInfo]]:
+        """
+        List available models.
+
+        The Models API response can be used to determine which models are available for
+        use in the API. More recently released models are listed first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/models",
+            page=AsyncPage[ModelInfo],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    model_list_params.ModelListParams,
+                ),
+            ),
+            model=ModelInfo,
+        )
+
+
+class ModelsWithRawResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            models.list,
+        )
+
+
+class AsyncModelsWithRawResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            models.list,
+        )
+
+
+class ModelsWithStreamingResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            models.list,
+        )
+
+
+class AsyncModelsWithStreamingResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            models.list,
+        )
diff --git a/src/anthropic/types/__init__.py b/src/anthropic/types/__init__.py
index 0125a215..a880b827 100644
--- a/src/anthropic/types/__init__.py
+++ b/src/anthropic/types/__init__.py
@@ -4,9 +4,23 @@
 
 from .model import Model as Model
 from .usage import Usage as Usage
+from .shared import (
+    ErrorObject as ErrorObject,
+    BillingError as BillingError,
+    ErrorResponse as ErrorResponse,
+    NotFoundError as NotFoundError,
+    APIErrorObject as APIErrorObject,
+    RateLimitError as RateLimitError,
+    OverloadedError as OverloadedError,
+    PermissionError as PermissionError,
+    AuthenticationError as AuthenticationError,
+    GatewayTimeoutError as GatewayTimeoutError,
+    InvalidRequestError as InvalidRequestError,
+)
 from .message import Message as Message
 from .beta_error import BetaError as BetaError
 from .completion import Completion as Completion
+from .model_info import ModelInfo as ModelInfo
 from .text_block import TextBlock as TextBlock
 from .text_delta import TextDelta as TextDelta
 from .tool_param import ToolParam as ToolParam
@@ -19,7 +33,9 @@
 from .input_json_delta import InputJSONDelta as InputJSONDelta
 from .text_block_param import TextBlockParam as TextBlockParam
 from .image_block_param import ImageBlockParam as ImageBlockParam
+from .model_list_params import ModelListParams as ModelListParams
 from .tool_choice_param import ToolChoiceParam as ToolChoiceParam
+from .beta_billing_error import BetaBillingError as BetaBillingError
 from .message_stop_event import MessageStopEvent as MessageStopEvent
 from .beta_error_response import BetaErrorResponse as BetaErrorResponse
 from .content_block_param import ContentBlockParam as ContentBlockParam
@@ -28,7 +44,9 @@
 from .message_start_event import MessageStartEvent as MessageStartEvent
 from .anthropic_beta_param import AnthropicBetaParam as AnthropicBetaParam
 from .beta_not_found_error import BetaNotFoundError as BetaNotFoundError
+from .document_block_param import DocumentBlockParam as DocumentBlockParam
 from .message_stream_event import MessageStreamEvent as MessageStreamEvent
+from .message_tokens_count import MessageTokensCount as MessageTokensCount
 from .tool_use_block_param import ToolUseBlockParam as ToolUseBlockParam
 from .beta_overloaded_error import BetaOverloadedError as BetaOverloadedError
 from .beta_permission_error import BetaPermissionError as BetaPermissionError
@@ -38,6 +56,7 @@
 from .raw_message_stop_event import RawMessageStopEvent as RawMessageStopEvent
 from .tool_choice_auto_param import ToolChoiceAutoParam as ToolChoiceAutoParam
 from .tool_choice_tool_param import ToolChoiceToolParam as ToolChoiceToolParam
+from .base64_pdf_source_param import Base64PDFSourceParam as Base64PDFSourceParam
 from .raw_message_delta_event import RawMessageDeltaEvent as RawMessageDeltaEvent
 from .raw_message_start_event import RawMessageStartEvent as RawMessageStartEvent
 from .tool_result_block_param import ToolResultBlockParam as ToolResultBlockParam
@@ -47,7 +66,10 @@
 from .beta_authentication_error import BetaAuthenticationError as BetaAuthenticationError
 from .content_block_delta_event import ContentBlockDeltaEvent as ContentBlockDeltaEvent
 from .content_block_start_event import ContentBlockStartEvent as ContentBlockStartEvent
+from .beta_gateway_timeout_error import BetaGatewayTimeoutError as BetaGatewayTimeoutError
 from .beta_invalid_request_error import BetaInvalidRequestError as BetaInvalidRequestError
+from .message_count_tokens_params import MessageCountTokensParams as MessageCountTokensParams
 from .raw_content_block_stop_event import RawContentBlockStopEvent as RawContentBlockStopEvent
+from .cache_control_ephemeral_param import CacheControlEphemeralParam as CacheControlEphemeralParam
 from .raw_content_block_delta_event import RawContentBlockDeltaEvent as RawContentBlockDeltaEvent
 from .raw_content_block_start_event import RawContentBlockStartEvent as RawContentBlockStartEvent
diff --git a/src/anthropic/types/base64_pdf_source_param.py b/src/anthropic/types/base64_pdf_source_param.py
new file mode 100644
index 00000000..ac247a19
--- /dev/null
+++ b/src/anthropic/types/base64_pdf_source_param.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, Annotated, TypedDict
+
+from .._types import Base64FileInput
+from .._utils import PropertyInfo
+from .._models import set_pydantic_config
+
+__all__ = ["Base64PDFSourceParam"]
+
+
+class Base64PDFSourceParam(TypedDict, total=False):
+    data: Required[Annotated[Union[str, Base64FileInput], PropertyInfo(format="base64")]]
+
+    media_type: Required[Literal["application/pdf"]]
+
+    type: Required[Literal["base64"]]
+
+
+set_pydantic_config(Base64PDFSourceParam, {"arbitrary_types_allowed": True})
diff --git a/src/anthropic/types/beta/__init__.py b/src/anthropic/types/beta/__init__.py
index cf5fd496..c233d9c7 100644
--- a/src/anthropic/types/beta/__init__.py
+++ b/src/anthropic/types/beta/__init__.py
@@ -4,9 +4,11 @@
 
 from .beta_usage import BetaUsage as BetaUsage
 from .beta_message import BetaMessage as BetaMessage
+from .beta_model_info import BetaModelInfo as BetaModelInfo
 from .beta_text_block import BetaTextBlock as BetaTextBlock
 from .beta_text_delta import BetaTextDelta as BetaTextDelta
 from .beta_tool_param import BetaToolParam as BetaToolParam
+from .model_list_params import ModelListParams as ModelListParams
 from .beta_content_block import BetaContentBlock as BetaContentBlock
 from .beta_message_param import BetaMessageParam as BetaMessageParam
 from .beta_metadata_param import BetaMetadataParam as BetaMetadataParam
diff --git a/src/anthropic/types/beta/beta_model_info.py b/src/anthropic/types/beta/beta_model_info.py
new file mode 100644
index 00000000..6ea50d9f
--- /dev/null
+++ b/src/anthropic/types/beta/beta_model_info.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from datetime import datetime
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaModelInfo"]
+
+
+class BetaModelInfo(BaseModel):
+    id: str
+    """Unique model identifier."""
+
+    created_at: datetime
+    """RFC 3339 datetime string representing the time at which the model was released.
+
+    May be set to an epoch value if the release date is unknown.
+    """
+
+    display_name: str
+    """A human-readable name for the model."""
+
+    type: Literal["model"]
+    """Object type.
+
+    For Models, this is always `"model"`.
+    """
diff --git a/src/anthropic/types/beta/beta_raw_content_block_delta_event.py b/src/anthropic/types/beta/beta_raw_content_block_delta_event.py
index 9e26688c..03ce6557 100644
--- a/src/anthropic/types/beta/beta_raw_content_block_delta_event.py
+++ b/src/anthropic/types/beta/beta_raw_content_block_delta_event.py
@@ -1,16 +1,15 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Literal, Annotated, TypeAlias
+from typing_extensions import Literal, TypeAlias
 
-from ..._utils import PropertyInfo
 from ..._models import BaseModel
 from .beta_text_delta import BetaTextDelta
 from .beta_input_json_delta import BetaInputJSONDelta
 
 __all__ = ["BetaRawContentBlockDeltaEvent", "Delta"]
 
-Delta: TypeAlias = Annotated[Union[BetaTextDelta, BetaInputJSONDelta], PropertyInfo(discriminator="type")]
+Delta: TypeAlias = Union[BetaTextDelta, BetaInputJSONDelta]
 
 
 class BetaRawContentBlockDeltaEvent(BaseModel):
diff --git a/src/anthropic/types/beta/messages/batch_list_params.py b/src/anthropic/types/beta/messages/batch_list_params.py
index b75cd931..3f406251 100644
--- a/src/anthropic/types/beta/messages/batch_list_params.py
+++ b/src/anthropic/types/beta/messages/batch_list_params.py
@@ -27,7 +27,7 @@ class BatchListParams(TypedDict, total=False):
     limit: int
     """Number of items to return per page.
 
-    Defaults to `20`. Ranges from `1` to `100`.
+    Defaults to `20`. Ranges from `1` to `1000`.
     """
 
     betas: Annotated[List[AnthropicBetaParam], PropertyInfo(alias="anthropic-beta")]
diff --git a/src/anthropic/types/beta/model_list_params.py b/src/anthropic/types/beta/model_list_params.py
new file mode 100644
index 00000000..b16d22a3
--- /dev/null
+++ b/src/anthropic/types/beta/model_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["ModelListParams"]
+
+
+class ModelListParams(TypedDict, total=False):
+    after_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately after this object.
+    """
+
+    before_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately before this object.
+    """
+
+    limit: int
+    """Number of items to return per page.
+
+    Defaults to `20`. Ranges from `1` to `1000`.
+    """
diff --git a/src/anthropic/types/beta/prompt_caching/__init__.py b/src/anthropic/types/beta/prompt_caching/__init__.py
deleted file mode 100644
index 3b4004fc..00000000
--- a/src/anthropic/types/beta/prompt_caching/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .message_create_params import MessageCreateParams as MessageCreateParams
-from .prompt_caching_beta_usage import PromptCachingBetaUsage as PromptCachingBetaUsage
-from .prompt_caching_beta_message import PromptCachingBetaMessage as PromptCachingBetaMessage
-from .prompt_caching_beta_tool_param import PromptCachingBetaToolParam as PromptCachingBetaToolParam
-from .prompt_caching_beta_message_param import PromptCachingBetaMessageParam as PromptCachingBetaMessageParam
-from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam as PromptCachingBetaTextBlockParam
-from .prompt_caching_beta_image_block_param import PromptCachingBetaImageBlockParam as PromptCachingBetaImageBlockParam
-from .prompt_caching_beta_tool_use_block_param import (
-    PromptCachingBetaToolUseBlockParam as PromptCachingBetaToolUseBlockParam,
-)
-from .prompt_caching_beta_tool_result_block_param import (
-    PromptCachingBetaToolResultBlockParam as PromptCachingBetaToolResultBlockParam,
-)
-from .raw_prompt_caching_beta_message_start_event import (
-    RawPromptCachingBetaMessageStartEvent as RawPromptCachingBetaMessageStartEvent,
-)
-from .raw_prompt_caching_beta_message_stream_event import (
-    RawPromptCachingBetaMessageStreamEvent as RawPromptCachingBetaMessageStreamEvent,
-)
-from .prompt_caching_beta_cache_control_ephemeral_param import (
-    PromptCachingBetaCacheControlEphemeralParam as PromptCachingBetaCacheControlEphemeralParam,
-)
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py
deleted file mode 100644
index 02dfb0bc..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Optional
-from typing_extensions import Literal, Required, Annotated, TypedDict
-
-from ...._types import Base64FileInput
-from ...._utils import PropertyInfo
-from ...._models import set_pydantic_config
-from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
-
-__all__ = ["PromptCachingBetaImageBlockParam", "Source"]
-
-
-class Source(TypedDict, total=False):
-    data: Required[Annotated[Union[str, Base64FileInput], PropertyInfo(format="base64")]]
-
-    media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
-
-    type: Required[Literal["base64"]]
-
-
-set_pydantic_config(Source, {"arbitrary_types_allowed": True})
-
-
-class PromptCachingBetaImageBlockParam(TypedDict, total=False):
-    source: Required[Source]
-
-    type: Required[Literal["image"]]
-
-    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py
deleted file mode 100644
index 2cc49a2c..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ...model import Model
-from ...._models import BaseModel
-from ...content_block import ContentBlock
-from .prompt_caching_beta_usage import PromptCachingBetaUsage
-
-__all__ = ["PromptCachingBetaMessage"]
-
-
-class PromptCachingBetaMessage(BaseModel):
-    id: str
-    """Unique object identifier.
-
-    The format and length of IDs may change over time.
-    """
-
-    content: List[ContentBlock]
-    """Content generated by the model.
-
-    This is an array of content blocks, each of which has a `type` that determines
-    its shape.
-
-    Example:
-
-    ```json
-    [{ "type": "text", "text": "Hi, I'm Claude." }]
-    ```
-
-    If the request input `messages` ended with an `assistant` turn, then the
-    response `content` will continue directly from that last turn. You can use this
-    to constrain the model's output.
-
-    For example, if the input `messages` were:
-
-    ```json
-    [
-      {
-        "role": "user",
-        "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-      },
-      { "role": "assistant", "content": "The best answer is (" }
-    ]
-    ```
-
-    Then the response `content` might be:
-
-    ```json
-    [{ "type": "text", "text": "B)" }]
-    ```
-    """
-
-    model: Model
-    """
-    The model that will complete your prompt.\n\nSee
-    [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-    details and options.
-    """
-
-    role: Literal["assistant"]
-    """Conversational role of the generated message.
-
-    This will always be `"assistant"`.
-    """
-
-    stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]] = None
-    """The reason that we stopped.
-
-    This may be one the following values:
-
-    - `"end_turn"`: the model reached a natural stopping point
-    - `"max_tokens"`: we exceeded the requested `max_tokens` or the model's maximum
-    - `"stop_sequence"`: one of your provided custom `stop_sequences` was generated
-    - `"tool_use"`: the model invoked one or more tools
-
-    In non-streaming mode this value is always non-null. In streaming mode, it is
-    null in the `message_start` event and non-null otherwise.
-    """
-
-    stop_sequence: Optional[str] = None
-    """Which custom stop sequence was generated, if any.
-
-    This value will be a non-null string if one of your custom stop sequences was
-    generated.
-    """
-
-    type: Literal["message"]
-    """Object type.
-
-    For Messages, this is always `"message"`.
-    """
-
-    usage: PromptCachingBetaUsage
-    """Billing and rate-limit usage.
-
-    Anthropic's API bills and rate-limits by token counts, as tokens represent the
-    underlying cost to our systems.
-
-    Under the hood, the API transforms requests into a format suitable for the
-    model. The model's output then goes through a parsing stage before becoming an
-    API response. As a result, the token counts in `usage` will not match one-to-one
-    with the exact visible content of an API request or response.
-
-    For example, `output_tokens` will be non-zero, even for an empty string response
-    from Claude.
-    """
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py
deleted file mode 100644
index f88093e2..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from ...content_block import ContentBlock
-from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
-from .prompt_caching_beta_image_block_param import PromptCachingBetaImageBlockParam
-from .prompt_caching_beta_tool_use_block_param import PromptCachingBetaToolUseBlockParam
-from .prompt_caching_beta_tool_result_block_param import PromptCachingBetaToolResultBlockParam
-
-__all__ = ["PromptCachingBetaMessageParam"]
-
-
-class PromptCachingBetaMessageParam(TypedDict, total=False):
-    content: Required[
-        Union[
-            str,
-            Iterable[
-                Union[
-                    PromptCachingBetaTextBlockParam,
-                    PromptCachingBetaImageBlockParam,
-                    PromptCachingBetaToolUseBlockParam,
-                    PromptCachingBetaToolResultBlockParam,
-                    ContentBlock,
-                ]
-            ],
-        ]
-    ]
-
-    role: Required[Literal["user", "assistant"]]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py
deleted file mode 100644
index cbb463d2..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Optional
-from typing_extensions import Literal, Required, TypedDict
-
-from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
-
-__all__ = ["PromptCachingBetaTextBlockParam"]
-
-
-class PromptCachingBetaTextBlockParam(TypedDict, total=False):
-    text: Required[str]
-
-    type: Required[Literal["text"]]
-
-    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py
deleted file mode 100644
index cfd9f8aa..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Optional
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
-
-__all__ = ["PromptCachingBetaToolParam", "InputSchema"]
-
-
-class InputSchemaTyped(TypedDict, total=False):
-    type: Required[Literal["object"]]
-
-    properties: Optional[object]
-
-
-InputSchema: TypeAlias = Union[InputSchemaTyped, Dict[str, object]]
-
-
-class PromptCachingBetaToolParam(TypedDict, total=False):
-    input_schema: Required[InputSchema]
-    """[JSON schema](https://json-schema.org/) for this tool's input.
-
-    This defines the shape of the `input` that your tool accepts and that the model
-    will produce.
-    """
-
-    name: Required[str]
-    """Name of the tool.
-
-    This is how the tool will be called by the model and in tool_use blocks.
-    """
-
-    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
-
-    description: str
-    """Description of what this tool does.
-
-    Tool descriptions should be as detailed as possible. The more information that
-    the model has about what the tool is and how to use it, the better it will
-    perform. You can use natural language descriptions to reinforce important
-    aspects of the tool input JSON schema.
-    """
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py
deleted file mode 100644
index 6c1ca718..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
-from .prompt_caching_beta_image_block_param import PromptCachingBetaImageBlockParam
-from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
-
-__all__ = ["PromptCachingBetaToolResultBlockParam", "Content"]
-
-Content: TypeAlias = Union[PromptCachingBetaTextBlockParam, PromptCachingBetaImageBlockParam]
-
-
-class PromptCachingBetaToolResultBlockParam(TypedDict, total=False):
-    tool_use_id: Required[str]
-
-    type: Required[Literal["tool_result"]]
-
-    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
-
-    content: Union[str, Iterable[Content]]
-
-    is_error: bool
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py
deleted file mode 100644
index 35ccf446..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Optional
-from typing_extensions import Literal, Required, TypedDict
-
-from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
-
-__all__ = ["PromptCachingBetaToolUseBlockParam"]
-
-
-class PromptCachingBetaToolUseBlockParam(TypedDict, total=False):
-    id: Required[str]
-
-    input: Required[object]
-
-    name: Required[str]
-
-    type: Required[Literal["tool_use"]]
-
-    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py
deleted file mode 100644
index 20d23004..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-
-from ...._models import BaseModel
-
-__all__ = ["PromptCachingBetaUsage"]
-
-
-class PromptCachingBetaUsage(BaseModel):
-    cache_creation_input_tokens: Optional[int] = None
-    """The number of input tokens used to create the cache entry."""
-
-    cache_read_input_tokens: Optional[int] = None
-    """The number of input tokens read from the cache."""
-
-    input_tokens: int
-    """The number of input tokens which were used."""
-
-    output_tokens: int
-    """The number of output tokens which were used."""
diff --git a/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py b/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py
deleted file mode 100644
index 9d055e22..00000000
--- a/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-from .prompt_caching_beta_message import PromptCachingBetaMessage
-
-__all__ = ["RawPromptCachingBetaMessageStartEvent"]
-
-
-class RawPromptCachingBetaMessageStartEvent(BaseModel):
-    message: PromptCachingBetaMessage
-
-    type: Literal["message_start"]
diff --git a/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py b/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py
deleted file mode 100644
index 58099baf..00000000
--- a/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Annotated, TypeAlias
-
-from ...._utils import PropertyInfo
-from ...raw_message_stop_event import RawMessageStopEvent
-from ...raw_message_delta_event import RawMessageDeltaEvent
-from ...raw_content_block_stop_event import RawContentBlockStopEvent
-from ...raw_content_block_delta_event import RawContentBlockDeltaEvent
-from ...raw_content_block_start_event import RawContentBlockStartEvent
-from .raw_prompt_caching_beta_message_start_event import RawPromptCachingBetaMessageStartEvent
-
-__all__ = ["RawPromptCachingBetaMessageStreamEvent"]
-
-RawPromptCachingBetaMessageStreamEvent: TypeAlias = Annotated[
-    Union[
-        RawPromptCachingBetaMessageStartEvent,
-        RawMessageDeltaEvent,
-        RawMessageStopEvent,
-        RawContentBlockStartEvent,
-        RawContentBlockDeltaEvent,
-        RawContentBlockStopEvent,
-    ],
-    PropertyInfo(discriminator="type"),
-]
diff --git a/src/anthropic/types/beta_billing_error.py b/src/anthropic/types/beta_billing_error.py
new file mode 100644
index 00000000..1ab37614
--- /dev/null
+++ b/src/anthropic/types/beta_billing_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["BetaBillingError"]
+
+
+class BetaBillingError(BaseModel):
+    message: str
+
+    type: Literal["billing_error"]
diff --git a/src/anthropic/types/beta_error.py b/src/anthropic/types/beta_error.py
index 4d870ff4..029d80dc 100644
--- a/src/anthropic/types/beta_error.py
+++ b/src/anthropic/types/beta_error.py
@@ -5,11 +5,13 @@
 
 from .._utils import PropertyInfo
 from .beta_api_error import BetaAPIError
+from .beta_billing_error import BetaBillingError
 from .beta_not_found_error import BetaNotFoundError
 from .beta_overloaded_error import BetaOverloadedError
 from .beta_permission_error import BetaPermissionError
 from .beta_rate_limit_error import BetaRateLimitError
 from .beta_authentication_error import BetaAuthenticationError
+from .beta_gateway_timeout_error import BetaGatewayTimeoutError
 from .beta_invalid_request_error import BetaInvalidRequestError
 
 __all__ = ["BetaError"]
@@ -18,9 +20,11 @@
     Union[
         BetaInvalidRequestError,
         BetaAuthenticationError,
+        BetaBillingError,
         BetaPermissionError,
         BetaNotFoundError,
         BetaRateLimitError,
+        BetaGatewayTimeoutError,
         BetaAPIError,
         BetaOverloadedError,
     ],
diff --git a/src/anthropic/types/beta_gateway_timeout_error.py b/src/anthropic/types/beta_gateway_timeout_error.py
new file mode 100644
index 00000000..9a29705b
--- /dev/null
+++ b/src/anthropic/types/beta_gateway_timeout_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["BetaGatewayTimeoutError"]
+
+
+class BetaGatewayTimeoutError(BaseModel):
+    message: str
+
+    type: Literal["timeout_error"]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py b/src/anthropic/types/cache_control_ephemeral_param.py
similarity index 62%
rename from src/anthropic/types/beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py
rename to src/anthropic/types/cache_control_ephemeral_param.py
index 8370b938..8900071e 100644
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py
+++ b/src/anthropic/types/cache_control_ephemeral_param.py
@@ -4,8 +4,8 @@
 
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["PromptCachingBetaCacheControlEphemeralParam"]
+__all__ = ["CacheControlEphemeralParam"]
 
 
-class PromptCachingBetaCacheControlEphemeralParam(TypedDict, total=False):
+class CacheControlEphemeralParam(TypedDict, total=False):
     type: Required[Literal["ephemeral"]]
diff --git a/src/anthropic/types/content_block_param.py b/src/anthropic/types/content_block_param.py
index 65e9bd4a..836a5e19 100644
--- a/src/anthropic/types/content_block_param.py
+++ b/src/anthropic/types/content_block_param.py
@@ -7,9 +7,12 @@
 
 from .text_block_param import TextBlockParam
 from .image_block_param import ImageBlockParam
+from .document_block_param import DocumentBlockParam
 from .tool_use_block_param import ToolUseBlockParam
 from .tool_result_block_param import ToolResultBlockParam
 
 __all__ = ["ContentBlockParam"]
 
-ContentBlockParam: TypeAlias = Union[TextBlockParam, ImageBlockParam, ToolUseBlockParam, ToolResultBlockParam]
+ContentBlockParam: TypeAlias = Union[
+    TextBlockParam, ImageBlockParam, ToolUseBlockParam, ToolResultBlockParam, DocumentBlockParam
+]
diff --git a/src/anthropic/types/document_block_param.py b/src/anthropic/types/document_block_param.py
new file mode 100644
index 00000000..57522e93
--- /dev/null
+++ b/src/anthropic/types/document_block_param.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .base64_pdf_source_param import Base64PDFSourceParam
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
+__all__ = ["DocumentBlockParam"]
+
+
+class DocumentBlockParam(TypedDict, total=False):
+    source: Required[Base64PDFSourceParam]
+
+    type: Required[Literal["document"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
diff --git a/src/anthropic/types/image_block_param.py b/src/anthropic/types/image_block_param.py
index d7f46fa9..bfd8c18e 100644
--- a/src/anthropic/types/image_block_param.py
+++ b/src/anthropic/types/image_block_param.py
@@ -2,12 +2,13 @@
 
 from __future__ import annotations
 
-from typing import Union
+from typing import Union, Optional
 from typing_extensions import Literal, Required, Annotated, TypedDict
 
 from .._types import Base64FileInput
 from .._utils import PropertyInfo
 from .._models import set_pydantic_config
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
 
 __all__ = ["ImageBlockParam", "Source"]
 
@@ -27,3 +28,5 @@ class ImageBlockParam(TypedDict, total=False):
     source: Required[Source]
 
     type: Required[Literal["image"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/message_create_params.py b/src/anthropic/types/message_count_tokens_params.py
similarity index 55%
rename from src/anthropic/types/beta/prompt_caching/message_create_params.py
rename to src/anthropic/types/message_count_tokens_params.py
index c95a0bbb..c3afbf36 100644
--- a/src/anthropic/types/beta/prompt_caching/message_create_params.py
+++ b/src/anthropic/types/message_count_tokens_params.py
@@ -2,43 +2,20 @@
 
 from __future__ import annotations
 
-from typing import List, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from ...model_param import ModelParam
-from ...metadata_param import MetadataParam
-from ...tool_choice_param import ToolChoiceParam
-from ...tool_choice_any_param import ToolChoiceAnyParam
-from ...tool_choice_auto_param import ToolChoiceAutoParam
-from ...tool_choice_tool_param import ToolChoiceToolParam
-from .prompt_caching_beta_tool_param import PromptCachingBetaToolParam
-from .prompt_caching_beta_message_param import PromptCachingBetaMessageParam
-from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
-
-__all__ = [
-    "MessageCreateParamsBase",
-    "Metadata",
-    "ToolChoice",
-    "ToolChoiceToolChoiceAuto",
-    "ToolChoiceToolChoiceAny",
-    "ToolChoiceToolChoiceTool",
-    "MessageCreateParamsNonStreaming",
-    "MessageCreateParamsStreaming",
-]
-
-
-class MessageCreateParamsBase(TypedDict, total=False):
-    max_tokens: Required[int]
-    """The maximum number of tokens to generate before stopping.
-
-    Note that our models may stop _before_ reaching this maximum. This parameter
-    only specifies the absolute maximum number of tokens to generate.
-
-    Different models have different maximum values for this parameter. See
-    [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-    """
+from typing import Union, Iterable
+from typing_extensions import Required, TypedDict
+
+from .tool_param import ToolParam
+from .model_param import ModelParam
+from .message_param import MessageParam
+from .text_block_param import TextBlockParam
+from .tool_choice_param import ToolChoiceParam
+
+__all__ = ["MessageCountTokensParams"]
 
-    messages: Required[Iterable[PromptCachingBetaMessageParam]]
+
+class MessageCountTokensParams(TypedDict, total=False):
+    messages: Required[Iterable[MessageParam]]
     """Input messages.
 
     Our models are trained to operate on alternating `user` and `assistant`
@@ -134,22 +111,7 @@ class MessageCreateParamsBase(TypedDict, total=False):
     details and options.
     """
 
-    metadata: MetadataParam
-    """An object describing metadata about the request."""
-
-    stop_sequences: List[str]
-    """Custom text sequences that will cause the model to stop generating.
-
-    Our models will normally stop when they have naturally completed their turn,
-    which will result in a response `stop_reason` of `"end_turn"`.
-
-    If you want the model to stop generating when it encounters custom strings of
-    text, you can use the `stop_sequences` parameter. If the model encounters one of
-    the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-    and the response `stop_sequence` value will contain the matched stop sequence.
-    """
-
-    system: Union[str, Iterable[PromptCachingBetaTextBlockParam]]
+    system: Union[str, Iterable[TextBlockParam]]
     """System prompt.
 
     A system prompt is a way of providing context and instructions to Claude, such
@@ -157,24 +119,13 @@ class MessageCreateParamsBase(TypedDict, total=False):
     [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
     """
 
-    temperature: float
-    """Amount of randomness injected into the response.
-
-    Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-    for analytical / multiple choice, and closer to `1.0` for creative and
-    generative tasks.
-
-    Note that even with `temperature` of `0.0`, the results will not be fully
-    deterministic.
-    """
-
     tool_choice: ToolChoiceParam
     """How the model should use the provided tools.
 
     The model can use a specific tool, any available tool, or decide by itself.
     """
 
-    tools: Iterable[PromptCachingBetaToolParam]
+    tools: Iterable[ToolParam]
     """Definitions of tools that the model may use.
 
     If you include `tools` in your API request, the model may return `tool_use`
@@ -244,62 +195,3 @@ class MessageCreateParamsBase(TypedDict, total=False):
 
     See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
     """
-
-    top_k: int
-    """Only sample from the top K options for each subsequent token.
-
-    Used to remove "long tail" low probability responses.
-    [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-    Recommended for advanced use cases only. You usually only need to use
-    `temperature`.
-    """
-
-    top_p: float
-    """Use nucleus sampling.
-
-    In nucleus sampling, we compute the cumulative distribution over all the options
-    for each subsequent token in decreasing probability order and cut it off once it
-    reaches a particular probability specified by `top_p`. You should either alter
-    `temperature` or `top_p`, but not both.
-
-    Recommended for advanced use cases only. You usually only need to use
-    `temperature`.
-    """
-
-
-Metadata: TypeAlias = MetadataParam
-"""This is deprecated, `MetadataParam` should be used instead"""
-
-ToolChoice: TypeAlias = ToolChoiceParam
-"""This is deprecated, `ToolChoiceParam` should be used instead"""
-
-ToolChoiceToolChoiceAuto: TypeAlias = ToolChoiceAutoParam
-"""This is deprecated, `ToolChoiceAutoParam` should be used instead"""
-
-ToolChoiceToolChoiceAny: TypeAlias = ToolChoiceAnyParam
-"""This is deprecated, `ToolChoiceAnyParam` should be used instead"""
-
-ToolChoiceToolChoiceTool: TypeAlias = ToolChoiceToolParam
-"""This is deprecated, `ToolChoiceToolParam` should be used instead"""
-
-
-class MessageCreateParamsNonStreaming(MessageCreateParamsBase, total=False):
-    stream: Literal[False]
-    """Whether to incrementally stream the response using server-sent events.
-
-    See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-    details.
-    """
-
-
-class MessageCreateParamsStreaming(MessageCreateParamsBase):
-    stream: Required[Literal[True]]
-    """Whether to incrementally stream the response using server-sent events.
-
-    See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-    details.
-    """
-
-
-MessageCreateParams = Union[MessageCreateParamsNonStreaming, MessageCreateParamsStreaming]
diff --git a/src/anthropic/types/message_param.py b/src/anthropic/types/message_param.py
index 89921c61..811fc7b5 100644
--- a/src/anthropic/types/message_param.py
+++ b/src/anthropic/types/message_param.py
@@ -8,6 +8,7 @@
 from .content_block import ContentBlock
 from .text_block_param import TextBlockParam
 from .image_block_param import ImageBlockParam
+from .document_block_param import DocumentBlockParam
 from .tool_use_block_param import ToolUseBlockParam
 from .tool_result_block_param import ToolResultBlockParam
 
@@ -17,7 +18,17 @@
 class MessageParam(TypedDict, total=False):
     content: Required[
         Union[
-            str, Iterable[Union[TextBlockParam, ImageBlockParam, ToolUseBlockParam, ToolResultBlockParam, ContentBlock]]
+            str,
+            Iterable[
+                Union[
+                    TextBlockParam,
+                    ImageBlockParam,
+                    ToolUseBlockParam,
+                    ToolResultBlockParam,
+                    DocumentBlockParam,
+                    ContentBlock,
+                ]
+            ],
         ]
     ]
 
diff --git a/src/anthropic/types/message_tokens_count.py b/src/anthropic/types/message_tokens_count.py
new file mode 100644
index 00000000..d570019f
--- /dev/null
+++ b/src/anthropic/types/message_tokens_count.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .._models import BaseModel
+
+__all__ = ["MessageTokensCount"]
+
+
+class MessageTokensCount(BaseModel):
+    input_tokens: int
+    """
+    The total number of tokens across the provided list of messages, system prompt,
+    and tools.
+    """
diff --git a/src/anthropic/types/messages/__init__.py b/src/anthropic/types/messages/__init__.py
new file mode 100644
index 00000000..c316f0ec
--- /dev/null
+++ b/src/anthropic/types/messages/__init__.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .message_batch import MessageBatch as MessageBatch
+from .batch_list_params import BatchListParams as BatchListParams
+from .batch_create_params import BatchCreateParams as BatchCreateParams
+from .message_batch_result import MessageBatchResult as MessageBatchResult
+from .message_batch_errored_result import MessageBatchErroredResult as MessageBatchErroredResult
+from .message_batch_expired_result import MessageBatchExpiredResult as MessageBatchExpiredResult
+from .message_batch_request_counts import MessageBatchRequestCounts as MessageBatchRequestCounts
+from .message_batch_canceled_result import MessageBatchCanceledResult as MessageBatchCanceledResult
+from .message_batch_succeeded_result import MessageBatchSucceededResult as MessageBatchSucceededResult
+from .message_batch_individual_response import MessageBatchIndividualResponse as MessageBatchIndividualResponse
diff --git a/src/anthropic/types/messages/batch_create_params.py b/src/anthropic/types/messages/batch_create_params.py
new file mode 100644
index 00000000..a82a5ff0
--- /dev/null
+++ b/src/anthropic/types/messages/batch_create_params.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Required, TypedDict
+
+from ..message_create_params import MessageCreateParamsNonStreaming
+
+__all__ = ["BatchCreateParams", "Request"]
+
+
+class BatchCreateParams(TypedDict, total=False):
+    requests: Required[Iterable[Request]]
+    """List of requests for prompt completion.
+
+    Each is an individual request to create a Message.
+    """
+
+
+class Request(TypedDict, total=False):
+    custom_id: Required[str]
+    """Developer-provided ID created for each request in a Message Batch.
+
+    Useful for matching results to requests, as results may be given out of request
+    order.
+
+    Must be unique for each request within the Message Batch.
+    """
+
+    params: Required[MessageCreateParamsNonStreaming]
+    """Messages API creation parameters for the individual request.
+
+    See the [Messages API reference](/en/api/messages) for full documentation on
+    available parameters.
+    """
diff --git a/src/anthropic/types/messages/batch_list_params.py b/src/anthropic/types/messages/batch_list_params.py
new file mode 100644
index 00000000..7b290a77
--- /dev/null
+++ b/src/anthropic/types/messages/batch_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["BatchListParams"]
+
+
+class BatchListParams(TypedDict, total=False):
+    after_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately after this object.
+    """
+
+    before_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately before this object.
+    """
+
+    limit: int
+    """Number of items to return per page.
+
+    Defaults to `20`. Ranges from `1` to `1000`.
+    """
diff --git a/src/anthropic/types/messages/message_batch.py b/src/anthropic/types/messages/message_batch.py
new file mode 100644
index 00000000..a03e73e1
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch.py
@@ -0,0 +1,77 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .message_batch_request_counts import MessageBatchRequestCounts
+
+__all__ = ["MessageBatch"]
+
+
+class MessageBatch(BaseModel):
+    id: str
+    """Unique object identifier.
+
+    The format and length of IDs may change over time.
+    """
+
+    archived_at: Optional[datetime] = None
+    """
+    RFC 3339 datetime string representing the time at which the Message Batch was
+    archived and its results became unavailable.
+    """
+
+    cancel_initiated_at: Optional[datetime] = None
+    """
+    RFC 3339 datetime string representing the time at which cancellation was
+    initiated for the Message Batch. Specified only if cancellation was initiated.
+    """
+
+    created_at: datetime
+    """
+    RFC 3339 datetime string representing the time at which the Message Batch was
+    created.
+    """
+
+    ended_at: Optional[datetime] = None
+    """
+    RFC 3339 datetime string representing the time at which processing for the
+    Message Batch ended. Specified only once processing ends.
+
+    Processing ends when every request in a Message Batch has either succeeded,
+    errored, canceled, or expired.
+    """
+
+    expires_at: datetime
+    """
+    RFC 3339 datetime string representing the time at which the Message Batch will
+    expire and end processing, which is 24 hours after creation.
+    """
+
+    processing_status: Literal["in_progress", "canceling", "ended"]
+    """Processing status of the Message Batch."""
+
+    request_counts: MessageBatchRequestCounts
+    """Tallies requests within the Message Batch, categorized by their status.
+
+    Requests start as `processing` and move to one of the other statuses only once
+    processing of the entire batch ends. The sum of all values always matches the
+    total number of requests in the batch.
+    """
+
+    results_url: Optional[str] = None
+    """URL to a `.jsonl` file containing the results of the Message Batch requests.
+
+    Specified only once processing ends.
+
+    Results in the file are not guaranteed to be in the same order as requests. Use
+    the `custom_id` field to match results to requests.
+    """
+
+    type: Literal["message_batch"]
+    """Object type.
+
+    For Message Batches, this is always `"message_batch"`.
+    """
diff --git a/src/anthropic/types/messages/message_batch_canceled_result.py b/src/anthropic/types/messages/message_batch_canceled_result.py
new file mode 100644
index 00000000..9826aa91
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_canceled_result.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["MessageBatchCanceledResult"]
+
+
+class MessageBatchCanceledResult(BaseModel):
+    type: Literal["canceled"]
diff --git a/src/anthropic/types/messages/message_batch_errored_result.py b/src/anthropic/types/messages/message_batch_errored_result.py
new file mode 100644
index 00000000..5f890bfd
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_errored_result.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.error_response import ErrorResponse
+
+__all__ = ["MessageBatchErroredResult"]
+
+
+class MessageBatchErroredResult(BaseModel):
+    error: ErrorResponse
+
+    type: Literal["errored"]
diff --git a/src/anthropic/types/messages/message_batch_expired_result.py b/src/anthropic/types/messages/message_batch_expired_result.py
new file mode 100644
index 00000000..ab9964e7
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_expired_result.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["MessageBatchExpiredResult"]
+
+
+class MessageBatchExpiredResult(BaseModel):
+    type: Literal["expired"]
diff --git a/src/anthropic/types/messages/message_batch_individual_response.py b/src/anthropic/types/messages/message_batch_individual_response.py
new file mode 100644
index 00000000..19d4f090
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_individual_response.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+from .message_batch_result import MessageBatchResult
+
+__all__ = ["MessageBatchIndividualResponse"]
+
+
+class MessageBatchIndividualResponse(BaseModel):
+    custom_id: str
+    """Developer-provided ID created for each request in a Message Batch.
+
+    Useful for matching results to requests, as results may be given out of request
+    order.
+
+    Must be unique for each request within the Message Batch.
+    """
+
+    result: MessageBatchResult
+    """Processing result for this request.
+
+    Contains a Message output if processing was successful, an error response if
+    processing failed, or the reason why processing was not attempted, such as
+    cancellation or expiration.
+    """
diff --git a/src/anthropic/types/messages/message_batch_request_counts.py b/src/anthropic/types/messages/message_batch_request_counts.py
new file mode 100644
index 00000000..04edc3c3
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_request_counts.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["MessageBatchRequestCounts"]
+
+
+class MessageBatchRequestCounts(BaseModel):
+    canceled: int
+    """Number of requests in the Message Batch that have been canceled.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
+
+    errored: int
+    """Number of requests in the Message Batch that encountered an error.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
+
+    expired: int
+    """Number of requests in the Message Batch that have expired.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
+
+    processing: int
+    """Number of requests in the Message Batch that are processing."""
+
+    succeeded: int
+    """Number of requests in the Message Batch that have completed successfully.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
diff --git a/src/anthropic/types/messages/message_batch_result.py b/src/anthropic/types/messages/message_batch_result.py
new file mode 100644
index 00000000..3186f2aa
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_result.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .message_batch_errored_result import MessageBatchErroredResult
+from .message_batch_expired_result import MessageBatchExpiredResult
+from .message_batch_canceled_result import MessageBatchCanceledResult
+from .message_batch_succeeded_result import MessageBatchSucceededResult
+
+__all__ = ["MessageBatchResult"]
+
+MessageBatchResult: TypeAlias = Annotated[
+    Union[
+        MessageBatchSucceededResult, MessageBatchErroredResult, MessageBatchCanceledResult, MessageBatchExpiredResult
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/anthropic/types/messages/message_batch_succeeded_result.py b/src/anthropic/types/messages/message_batch_succeeded_result.py
new file mode 100644
index 00000000..1cc454a4
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_succeeded_result.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..message import Message
+from ..._models import BaseModel
+
+__all__ = ["MessageBatchSucceededResult"]
+
+
+class MessageBatchSucceededResult(BaseModel):
+    message: Message
+
+    type: Literal["succeeded"]
diff --git a/src/anthropic/types/model_info.py b/src/anthropic/types/model_info.py
new file mode 100644
index 00000000..0e3945fe
--- /dev/null
+++ b/src/anthropic/types/model_info.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from datetime import datetime
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ModelInfo"]
+
+
+class ModelInfo(BaseModel):
+    id: str
+    """Unique model identifier."""
+
+    created_at: datetime
+    """RFC 3339 datetime string representing the time at which the model was released.
+
+    May be set to an epoch value if the release date is unknown.
+    """
+
+    display_name: str
+    """A human-readable name for the model."""
+
+    type: Literal["model"]
+    """Object type.
+
+    For Models, this is always `"model"`.
+    """
diff --git a/src/anthropic/types/model_list_params.py b/src/anthropic/types/model_list_params.py
new file mode 100644
index 00000000..b16d22a3
--- /dev/null
+++ b/src/anthropic/types/model_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["ModelListParams"]
+
+
+class ModelListParams(TypedDict, total=False):
+    after_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately after this object.
+    """
+
+    before_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately before this object.
+    """
+
+    limit: int
+    """Number of items to return per page.
+
+    Defaults to `20`. Ranges from `1` to `1000`.
+    """
diff --git a/src/anthropic/types/raw_content_block_delta_event.py b/src/anthropic/types/raw_content_block_delta_event.py
index b384fbd3..8785197f 100644
--- a/src/anthropic/types/raw_content_block_delta_event.py
+++ b/src/anthropic/types/raw_content_block_delta_event.py
@@ -1,16 +1,15 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Literal, Annotated, TypeAlias
+from typing_extensions import Literal, TypeAlias
 
-from .._utils import PropertyInfo
 from .._models import BaseModel
 from .text_delta import TextDelta
 from .input_json_delta import InputJSONDelta
 
 __all__ = ["RawContentBlockDeltaEvent", "Delta"]
 
-Delta: TypeAlias = Annotated[Union[TextDelta, InputJSONDelta], PropertyInfo(discriminator="type")]
+Delta: TypeAlias = Union[TextDelta, InputJSONDelta]
 
 
 class RawContentBlockDeltaEvent(BaseModel):
diff --git a/src/anthropic/types/shared/__init__.py b/src/anthropic/types/shared/__init__.py
new file mode 100644
index 00000000..178643b6
--- /dev/null
+++ b/src/anthropic/types/shared/__init__.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .error_object import ErrorObject as ErrorObject
+from .billing_error import BillingError as BillingError
+from .error_response import ErrorResponse as ErrorResponse
+from .not_found_error import NotFoundError as NotFoundError
+from .api_error_object import APIErrorObject as APIErrorObject
+from .overloaded_error import OverloadedError as OverloadedError
+from .permission_error import PermissionError as PermissionError
+from .rate_limit_error import RateLimitError as RateLimitError
+from .authentication_error import AuthenticationError as AuthenticationError
+from .gateway_timeout_error import GatewayTimeoutError as GatewayTimeoutError
+from .invalid_request_error import InvalidRequestError as InvalidRequestError
diff --git a/src/anthropic/types/shared/api_error_object.py b/src/anthropic/types/shared/api_error_object.py
new file mode 100644
index 00000000..dd92bead
--- /dev/null
+++ b/src/anthropic/types/shared/api_error_object.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["APIErrorObject"]
+
+
+class APIErrorObject(BaseModel):
+    message: str
+
+    type: Literal["api_error"]
diff --git a/src/anthropic/types/shared/authentication_error.py b/src/anthropic/types/shared/authentication_error.py
new file mode 100644
index 00000000..f777f5c8
--- /dev/null
+++ b/src/anthropic/types/shared/authentication_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["AuthenticationError"]
+
+
+class AuthenticationError(BaseModel):
+    message: str
+
+    type: Literal["authentication_error"]
diff --git a/src/anthropic/types/shared/billing_error.py b/src/anthropic/types/shared/billing_error.py
new file mode 100644
index 00000000..26be12bb
--- /dev/null
+++ b/src/anthropic/types/shared/billing_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BillingError"]
+
+
+class BillingError(BaseModel):
+    message: str
+
+    type: Literal["billing_error"]
diff --git a/src/anthropic/types/shared/error_object.py b/src/anthropic/types/shared/error_object.py
new file mode 100644
index 00000000..086db503
--- /dev/null
+++ b/src/anthropic/types/shared/error_object.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .billing_error import BillingError
+from .not_found_error import NotFoundError
+from .api_error_object import APIErrorObject
+from .overloaded_error import OverloadedError
+from .permission_error import PermissionError
+from .rate_limit_error import RateLimitError
+from .authentication_error import AuthenticationError
+from .gateway_timeout_error import GatewayTimeoutError
+from .invalid_request_error import InvalidRequestError
+
+__all__ = ["ErrorObject"]
+
+ErrorObject: TypeAlias = Annotated[
+    Union[
+        InvalidRequestError,
+        AuthenticationError,
+        BillingError,
+        PermissionError,
+        NotFoundError,
+        RateLimitError,
+        GatewayTimeoutError,
+        APIErrorObject,
+        OverloadedError,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/anthropic/types/shared/error_response.py b/src/anthropic/types/shared/error_response.py
new file mode 100644
index 00000000..97034923
--- /dev/null
+++ b/src/anthropic/types/shared/error_response.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .error_object import ErrorObject
+
+__all__ = ["ErrorResponse"]
+
+
+class ErrorResponse(BaseModel):
+    error: ErrorObject
+
+    type: Literal["error"]
diff --git a/src/anthropic/types/shared/gateway_timeout_error.py b/src/anthropic/types/shared/gateway_timeout_error.py
new file mode 100644
index 00000000..908aa12f
--- /dev/null
+++ b/src/anthropic/types/shared/gateway_timeout_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["GatewayTimeoutError"]
+
+
+class GatewayTimeoutError(BaseModel):
+    message: str
+
+    type: Literal["timeout_error"]
diff --git a/src/anthropic/types/shared/invalid_request_error.py b/src/anthropic/types/shared/invalid_request_error.py
new file mode 100644
index 00000000..ee5befc0
--- /dev/null
+++ b/src/anthropic/types/shared/invalid_request_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InvalidRequestError"]
+
+
+class InvalidRequestError(BaseModel):
+    message: str
+
+    type: Literal["invalid_request_error"]
diff --git a/src/anthropic/types/shared/not_found_error.py b/src/anthropic/types/shared/not_found_error.py
new file mode 100644
index 00000000..43e826fb
--- /dev/null
+++ b/src/anthropic/types/shared/not_found_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["NotFoundError"]
+
+
+class NotFoundError(BaseModel):
+    message: str
+
+    type: Literal["not_found_error"]
diff --git a/src/anthropic/types/shared/overloaded_error.py b/src/anthropic/types/shared/overloaded_error.py
new file mode 100644
index 00000000..74ee8373
--- /dev/null
+++ b/src/anthropic/types/shared/overloaded_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["OverloadedError"]
+
+
+class OverloadedError(BaseModel):
+    message: str
+
+    type: Literal["overloaded_error"]
diff --git a/src/anthropic/types/shared/permission_error.py b/src/anthropic/types/shared/permission_error.py
new file mode 100644
index 00000000..48eb3546
--- /dev/null
+++ b/src/anthropic/types/shared/permission_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["PermissionError"]
+
+
+class PermissionError(BaseModel):
+    message: str
+
+    type: Literal["permission_error"]
diff --git a/src/anthropic/types/shared/rate_limit_error.py b/src/anthropic/types/shared/rate_limit_error.py
new file mode 100644
index 00000000..3fa065ac
--- /dev/null
+++ b/src/anthropic/types/shared/rate_limit_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RateLimitError"]
+
+
+class RateLimitError(BaseModel):
+    message: str
+
+    type: Literal["rate_limit_error"]
diff --git a/src/anthropic/types/text_block_param.py b/src/anthropic/types/text_block_param.py
index 825d1660..0b27ee2b 100644
--- a/src/anthropic/types/text_block_param.py
+++ b/src/anthropic/types/text_block_param.py
@@ -2,8 +2,11 @@
 
 from __future__ import annotations
 
+from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
 __all__ = ["TextBlockParam"]
 
 
@@ -11,3 +14,5 @@ class TextBlockParam(TypedDict, total=False):
     text: Required[str]
 
     type: Required[Literal["text"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
diff --git a/src/anthropic/types/tool_param.py b/src/anthropic/types/tool_param.py
index 35a95516..3a6ab1dd 100644
--- a/src/anthropic/types/tool_param.py
+++ b/src/anthropic/types/tool_param.py
@@ -5,6 +5,8 @@
 from typing import Dict, Union, Optional
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
 __all__ = ["ToolParam", "InputSchema"]
 
 
@@ -31,6 +33,8 @@ class ToolParam(TypedDict, total=False):
     This is how the tool will be called by the model and in tool_use blocks.
     """
 
+    cache_control: Optional[CacheControlEphemeralParam]
+
     description: str
     """Description of what this tool does.
 
diff --git a/src/anthropic/types/tool_result_block_param.py b/src/anthropic/types/tool_result_block_param.py
index 7c212e19..b6ca8aa9 100644
--- a/src/anthropic/types/tool_result_block_param.py
+++ b/src/anthropic/types/tool_result_block_param.py
@@ -2,11 +2,12 @@
 
 from __future__ import annotations
 
-from typing import Union, Iterable
+from typing import Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from .text_block_param import TextBlockParam
 from .image_block_param import ImageBlockParam
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
 
 __all__ = ["ToolResultBlockParam", "Content"]
 
@@ -18,6 +19,8 @@ class ToolResultBlockParam(TypedDict, total=False):
 
     type: Required[Literal["tool_result"]]
 
+    cache_control: Optional[CacheControlEphemeralParam]
+
     content: Union[str, Iterable[Content]]
 
     is_error: bool
diff --git a/src/anthropic/types/tool_use_block_param.py b/src/anthropic/types/tool_use_block_param.py
index e0218476..cc285079 100644
--- a/src/anthropic/types/tool_use_block_param.py
+++ b/src/anthropic/types/tool_use_block_param.py
@@ -2,8 +2,11 @@
 
 from __future__ import annotations
 
+from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
 __all__ = ["ToolUseBlockParam"]
 
 
@@ -15,3 +18,5 @@ class ToolUseBlockParam(TypedDict, total=False):
     name: Required[str]
 
     type: Required[Literal["tool_use"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
diff --git a/src/anthropic/types/usage.py b/src/anthropic/types/usage.py
index 88f1ec84..b4f817bd 100644
--- a/src/anthropic/types/usage.py
+++ b/src/anthropic/types/usage.py
@@ -1,5 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import Optional
 
 from .._models import BaseModel
 
@@ -7,6 +8,12 @@
 
 
 class Usage(BaseModel):
+    cache_creation_input_tokens: Optional[int] = None
+    """The number of input tokens used to create the cache entry."""
+
+    cache_read_input_tokens: Optional[int] = None
+    """The number of input tokens read from the cache."""
+
     input_tokens: int
     """The number of input tokens which were used."""
 
diff --git a/tests/api_resources/beta/prompt_caching/test_messages.py b/tests/api_resources/beta/prompt_caching/test_messages.py
deleted file mode 100644
index 4ecb1624..00000000
--- a/tests/api_resources/beta/prompt_caching/test_messages.py
+++ /dev/null
@@ -1,442 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from anthropic import Anthropic, AsyncAnthropic
-from tests.utils import assert_matches_type
-from anthropic.types.beta.prompt_caching import PromptCachingBetaMessage
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestMessages:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create_overload_1(self, client: Anthropic) -> None:
-        message = client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-        )
-        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> None:
-        message = client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
-            stop_sequences=["string"],
-            stream=False,
-            system=[
-                {
-                    "text": "Today's date is 2024-06-01.",
-                    "type": "text",
-                    "cache_control": {"type": "ephemeral"},
-                }
-            ],
-            temperature=1,
-            tool_choice={
-                "type": "auto",
-                "disable_parallel_tool_use": True,
-            },
-            tools=[
-                {
-                    "input_schema": {
-                        "type": "object",
-                        "properties": {
-                            "location": {
-                                "description": "The city and state, e.g. San Francisco, CA",
-                                "type": "string",
-                            },
-                            "unit": {
-                                "description": "Unit for the output - one of (celsius, fahrenheit)",
-                                "type": "string",
-                            },
-                        },
-                    },
-                    "name": "x",
-                    "cache_control": {"type": "ephemeral"},
-                    "description": "Get the current weather in a given location",
-                }
-            ],
-            top_k=5,
-            top_p=0.7,
-            betas=["string"],
-        )
-        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-    @parametrize
-    def test_raw_response_create_overload_1(self, client: Anthropic) -> None:
-        response = client.beta.prompt_caching.messages.with_raw_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        message = response.parse()
-        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create_overload_1(self, client: Anthropic) -> None:
-        with client.beta.prompt_caching.messages.with_streaming_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            message = response.parse()
-            assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_create_overload_2(self, client: Anthropic) -> None:
-        message_stream = client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-        )
-        message_stream.response.close()
-
-    @parametrize
-    def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> None:
-        message_stream = client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-            metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
-            stop_sequences=["string"],
-            system=[
-                {
-                    "text": "Today's date is 2024-06-01.",
-                    "type": "text",
-                    "cache_control": {"type": "ephemeral"},
-                }
-            ],
-            temperature=1,
-            tool_choice={
-                "type": "auto",
-                "disable_parallel_tool_use": True,
-            },
-            tools=[
-                {
-                    "input_schema": {
-                        "type": "object",
-                        "properties": {
-                            "location": {
-                                "description": "The city and state, e.g. San Francisco, CA",
-                                "type": "string",
-                            },
-                            "unit": {
-                                "description": "Unit for the output - one of (celsius, fahrenheit)",
-                                "type": "string",
-                            },
-                        },
-                    },
-                    "name": "x",
-                    "cache_control": {"type": "ephemeral"},
-                    "description": "Get the current weather in a given location",
-                }
-            ],
-            top_k=5,
-            top_p=0.7,
-            betas=["string"],
-        )
-        message_stream.response.close()
-
-    @parametrize
-    def test_raw_response_create_overload_2(self, client: Anthropic) -> None:
-        response = client.beta.prompt_caching.messages.with_raw_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = response.parse()
-        stream.close()
-
-    @parametrize
-    def test_streaming_response_create_overload_2(self, client: Anthropic) -> None:
-        with client.beta.prompt_caching.messages.with_streaming_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = response.parse()
-            stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncMessages:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    async def test_method_create_overload_1(self, async_client: AsyncAnthropic) -> None:
-        message = await async_client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-        )
-        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncAnthropic) -> None:
-        message = await async_client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
-            stop_sequences=["string"],
-            stream=False,
-            system=[
-                {
-                    "text": "Today's date is 2024-06-01.",
-                    "type": "text",
-                    "cache_control": {"type": "ephemeral"},
-                }
-            ],
-            temperature=1,
-            tool_choice={
-                "type": "auto",
-                "disable_parallel_tool_use": True,
-            },
-            tools=[
-                {
-                    "input_schema": {
-                        "type": "object",
-                        "properties": {
-                            "location": {
-                                "description": "The city and state, e.g. San Francisco, CA",
-                                "type": "string",
-                            },
-                            "unit": {
-                                "description": "Unit for the output - one of (celsius, fahrenheit)",
-                                "type": "string",
-                            },
-                        },
-                    },
-                    "name": "x",
-                    "cache_control": {"type": "ephemeral"},
-                    "description": "Get the current weather in a given location",
-                }
-            ],
-            top_k=5,
-            top_p=0.7,
-            betas=["string"],
-        )
-        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create_overload_1(self, async_client: AsyncAnthropic) -> None:
-        response = await async_client.beta.prompt_caching.messages.with_raw_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        message = response.parse()
-        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create_overload_1(self, async_client: AsyncAnthropic) -> None:
-        async with async_client.beta.prompt_caching.messages.with_streaming_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            message = await response.parse()
-            assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_create_overload_2(self, async_client: AsyncAnthropic) -> None:
-        message_stream = await async_client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-        )
-        await message_stream.response.aclose()
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncAnthropic) -> None:
-        message_stream = await async_client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-            metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
-            stop_sequences=["string"],
-            system=[
-                {
-                    "text": "Today's date is 2024-06-01.",
-                    "type": "text",
-                    "cache_control": {"type": "ephemeral"},
-                }
-            ],
-            temperature=1,
-            tool_choice={
-                "type": "auto",
-                "disable_parallel_tool_use": True,
-            },
-            tools=[
-                {
-                    "input_schema": {
-                        "type": "object",
-                        "properties": {
-                            "location": {
-                                "description": "The city and state, e.g. San Francisco, CA",
-                                "type": "string",
-                            },
-                            "unit": {
-                                "description": "Unit for the output - one of (celsius, fahrenheit)",
-                                "type": "string",
-                            },
-                        },
-                    },
-                    "name": "x",
-                    "cache_control": {"type": "ephemeral"},
-                    "description": "Get the current weather in a given location",
-                }
-            ],
-            top_k=5,
-            top_p=0.7,
-            betas=["string"],
-        )
-        await message_stream.response.aclose()
-
-    @parametrize
-    async def test_raw_response_create_overload_2(self, async_client: AsyncAnthropic) -> None:
-        response = await async_client.beta.prompt_caching.messages.with_raw_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = response.parse()
-        await stream.close()
-
-    @parametrize
-    async def test_streaming_response_create_overload_2(self, async_client: AsyncAnthropic) -> None:
-        async with async_client.beta.prompt_caching.messages.with_streaming_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = await response.parse()
-            await stream.close()
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/test_models.py b/tests/api_resources/beta/test_models.py
new file mode 100644
index 00000000..17ffd939
--- /dev/null
+++ b/tests/api_resources/beta/test_models.py
@@ -0,0 +1,167 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from anthropic import Anthropic, AsyncAnthropic
+from tests.utils import assert_matches_type
+from anthropic.pagination import SyncPage, AsyncPage
+from anthropic.types.beta import BetaModelInfo
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestModels:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_retrieve(self, client: Anthropic) -> None:
+        model = client.beta.models.retrieve(
+            "model_id",
+        )
+        assert_matches_type(BetaModelInfo, model, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: Anthropic) -> None:
+        response = client.beta.models.with_raw_response.retrieve(
+            "model_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(BetaModelInfo, model, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: Anthropic) -> None:
+        with client.beta.models.with_streaming_response.retrieve(
+            "model_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(BetaModelInfo, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: Anthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
+            client.beta.models.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: Anthropic) -> None:
+        model = client.beta.models.list()
+        assert_matches_type(SyncPage[BetaModelInfo], model, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: Anthropic) -> None:
+        model = client.beta.models.list(
+            after_id="after_id",
+            before_id="before_id",
+            limit=1,
+        )
+        assert_matches_type(SyncPage[BetaModelInfo], model, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: Anthropic) -> None:
+        response = client.beta.models.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(SyncPage[BetaModelInfo], model, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: Anthropic) -> None:
+        with client.beta.models.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(SyncPage[BetaModelInfo], model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncModels:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncAnthropic) -> None:
+        model = await async_client.beta.models.retrieve(
+            "model_id",
+        )
+        assert_matches_type(BetaModelInfo, model, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.beta.models.with_raw_response.retrieve(
+            "model_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(BetaModelInfo, model, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.beta.models.with_streaming_response.retrieve(
+            "model_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(BetaModelInfo, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncAnthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
+            await async_client.beta.models.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncAnthropic) -> None:
+        model = await async_client.beta.models.list()
+        assert_matches_type(AsyncPage[BetaModelInfo], model, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncAnthropic) -> None:
+        model = await async_client.beta.models.list(
+            after_id="after_id",
+            before_id="before_id",
+            limit=1,
+        )
+        assert_matches_type(AsyncPage[BetaModelInfo], model, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.beta.models.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(AsyncPage[BetaModelInfo], model, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.beta.models.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(AsyncPage[BetaModelInfo], model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/prompt_caching/__init__.py b/tests/api_resources/messages/__init__.py
similarity index 100%
rename from tests/api_resources/beta/prompt_caching/__init__.py
rename to tests/api_resources/messages/__init__.py
diff --git a/tests/api_resources/messages/test_batches.py b/tests/api_resources/messages/test_batches.py
new file mode 100644
index 00000000..770d3cb7
--- /dev/null
+++ b/tests/api_resources/messages/test_batches.py
@@ -0,0 +1,469 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+
+import os
+import json
+from typing import Any, cast
+
+import httpx
+import pytest
+from respx import MockRouter
+
+from anthropic import Anthropic, AsyncAnthropic
+from tests.utils import assert_matches_type
+from anthropic.pagination import SyncPage, AsyncPage
+from anthropic.types.messages import MessageBatch
+
+# pyright: reportDeprecated=false
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestBatches:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: Anthropic) -> None:
+        batch = client.messages.batches.create(
+            requests=[
+                {
+                    "custom_id": "my-custom-id-1",
+                    "params": {
+                        "max_tokens": 1024,
+                        "messages": [
+                            {
+                                "content": "Hello, world",
+                                "role": "user",
+                            }
+                        ],
+                        "model": "claude-3-5-sonnet-20241022",
+                    },
+                }
+            ],
+        )
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: Anthropic) -> None:
+        response = client.messages.batches.with_raw_response.create(
+            requests=[
+                {
+                    "custom_id": "my-custom-id-1",
+                    "params": {
+                        "max_tokens": 1024,
+                        "messages": [
+                            {
+                                "content": "Hello, world",
+                                "role": "user",
+                            }
+                        ],
+                        "model": "claude-3-5-sonnet-20241022",
+                    },
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: Anthropic) -> None:
+        with client.messages.batches.with_streaming_response.create(
+            requests=[
+                {
+                    "custom_id": "my-custom-id-1",
+                    "params": {
+                        "max_tokens": 1024,
+                        "messages": [
+                            {
+                                "content": "Hello, world",
+                                "role": "user",
+                            }
+                        ],
+                        "model": "claude-3-5-sonnet-20241022",
+                    },
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(MessageBatch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: Anthropic) -> None:
+        batch = client.messages.batches.retrieve(
+            "message_batch_id",
+        )
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: Anthropic) -> None:
+        response = client.messages.batches.with_raw_response.retrieve(
+            "message_batch_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: Anthropic) -> None:
+        with client.messages.batches.with_streaming_response.retrieve(
+            "message_batch_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(MessageBatch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: Anthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_batch_id` but received ''"):
+            client.messages.batches.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: Anthropic) -> None:
+        batch = client.messages.batches.list()
+        assert_matches_type(SyncPage[MessageBatch], batch, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: Anthropic) -> None:
+        batch = client.messages.batches.list(
+            after_id="after_id",
+            before_id="before_id",
+            limit=1,
+        )
+        assert_matches_type(SyncPage[MessageBatch], batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: Anthropic) -> None:
+        response = client.messages.batches.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(SyncPage[MessageBatch], batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: Anthropic) -> None:
+        with client.messages.batches.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(SyncPage[MessageBatch], batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_cancel(self, client: Anthropic) -> None:
+        batch = client.messages.batches.cancel(
+            "message_batch_id",
+        )
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: Anthropic) -> None:
+        response = client.messages.batches.with_raw_response.cancel(
+            "message_batch_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: Anthropic) -> None:
+        with client.messages.batches.with_streaming_response.cancel(
+            "message_batch_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(MessageBatch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: Anthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_batch_id` but received ''"):
+            client.messages.batches.with_raw_response.cancel(
+                "",
+            )
+
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.parametrize("client", [False], indirect=True)
+    def test_method_results(self, client: Anthropic, respx_mock: MockRouter) -> None:
+        respx_mock.get("/v1/messages/batches/message_batch_id").mock(
+            return_value=httpx.Response(200, json={"results_url": "/v1/messages/batches/message_batch_id/results"})
+        )
+        respx_mock.get("/v1/messages/batches/message_batch_id/results").mock(
+            return_value=httpx.Response(
+                200, content="\n".join([json.dumps({"foo": "bar"}), json.dumps({"bar": "baz"})])
+            )
+        )
+        results = client.beta.messages.batches.results(
+            message_batch_id="message_batch_id",
+        )
+        assert results.http_response is not None
+        assert not results.http_response.is_stream_consumed
+
+        i = -1
+        for result in results:
+            i += 1
+            if i == 0:
+                assert result.to_dict() == {"foo": "bar"}
+            elif i == 1:
+                assert result.to_dict() == {"bar": "baz"}
+            else:
+                raise RuntimeError(f"iterated too many times, expected 2 times but got {i + 1}")
+
+        assert i == 1
+        assert results.http_response.is_stream_consumed
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_path_params_results(self, client: Anthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_batch_id` but received ''"):
+            client.messages.batches.with_raw_response.results(
+                "",
+            )
+
+
+class TestAsyncBatches:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncAnthropic) -> None:
+        batch = await async_client.messages.batches.create(
+            requests=[
+                {
+                    "custom_id": "my-custom-id-1",
+                    "params": {
+                        "max_tokens": 1024,
+                        "messages": [
+                            {
+                                "content": "Hello, world",
+                                "role": "user",
+                            }
+                        ],
+                        "model": "claude-3-5-sonnet-20241022",
+                    },
+                }
+            ],
+        )
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.messages.batches.with_raw_response.create(
+            requests=[
+                {
+                    "custom_id": "my-custom-id-1",
+                    "params": {
+                        "max_tokens": 1024,
+                        "messages": [
+                            {
+                                "content": "Hello, world",
+                                "role": "user",
+                            }
+                        ],
+                        "model": "claude-3-5-sonnet-20241022",
+                    },
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.messages.batches.with_streaming_response.create(
+            requests=[
+                {
+                    "custom_id": "my-custom-id-1",
+                    "params": {
+                        "max_tokens": 1024,
+                        "messages": [
+                            {
+                                "content": "Hello, world",
+                                "role": "user",
+                            }
+                        ],
+                        "model": "claude-3-5-sonnet-20241022",
+                    },
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(MessageBatch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncAnthropic) -> None:
+        batch = await async_client.messages.batches.retrieve(
+            "message_batch_id",
+        )
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.messages.batches.with_raw_response.retrieve(
+            "message_batch_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.messages.batches.with_streaming_response.retrieve(
+            "message_batch_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(MessageBatch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncAnthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_batch_id` but received ''"):
+            await async_client.messages.batches.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncAnthropic) -> None:
+        batch = await async_client.messages.batches.list()
+        assert_matches_type(AsyncPage[MessageBatch], batch, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncAnthropic) -> None:
+        batch = await async_client.messages.batches.list(
+            after_id="after_id",
+            before_id="before_id",
+            limit=1,
+        )
+        assert_matches_type(AsyncPage[MessageBatch], batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.messages.batches.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(AsyncPage[MessageBatch], batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.messages.batches.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(AsyncPage[MessageBatch], batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncAnthropic) -> None:
+        batch = await async_client.messages.batches.cancel(
+            "message_batch_id",
+        )
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.messages.batches.with_raw_response.cancel(
+            "message_batch_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.messages.batches.with_streaming_response.cancel(
+            "message_batch_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(MessageBatch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncAnthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_batch_id` but received ''"):
+            await async_client.messages.batches.with_raw_response.cancel(
+                "",
+            )
+
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.parametrize("async_client", [False], indirect=True)
+    async def test_method_results(self, async_client: AsyncAnthropic, respx_mock: MockRouter) -> None:
+        respx_mock.get("/v1/messages/batches/message_batch_id").mock(
+            return_value=httpx.Response(200, json={"results_url": "/v1/messages/batches/message_batch_id/results"})
+        )
+        respx_mock.get("/v1/messages/batches/message_batch_id/results").mock(
+            return_value=httpx.Response(
+                200, content="\n".join([json.dumps({"foo": "bar"}), json.dumps({"bar": "baz"})])
+            )
+        )
+        results = await async_client.beta.messages.batches.results(
+            message_batch_id="message_batch_id",
+        )
+        assert results.http_response is not None
+        assert not results.http_response.is_stream_consumed
+
+        i = -1
+        async for result in results:
+            i += 1
+            if i == 0:
+                assert result.to_dict() == {"foo": "bar"}
+            elif i == 1:
+                assert result.to_dict() == {"bar": "baz"}
+            else:
+                raise RuntimeError(f"iterated too many times, expected 2 times but got {i + 1}")
+
+        assert i == 1
+        assert results.http_response.is_stream_consumed
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_path_params_results(self, async_client: AsyncAnthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_batch_id` but received ''"):
+            await async_client.messages.batches.with_raw_response.results(
+                "",
+            )
diff --git a/tests/api_resources/test_messages.py b/tests/api_resources/test_messages.py
index 533918dd..1c1a0df0 100644
--- a/tests/api_resources/test_messages.py
+++ b/tests/api_resources/test_messages.py
@@ -9,7 +9,10 @@
 
 from anthropic import Anthropic, AsyncAnthropic
 from tests.utils import assert_matches_type
-from anthropic.types import Message
+from anthropic.types import (
+    Message,
+    MessageTokensCount,
+)
 from anthropic.resources.messages import DEPRECATED_MODELS
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -50,6 +53,7 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
                 {
                     "text": "Today's date is 2024-06-01.",
                     "type": "text",
+                    "cache_control": {"type": "ephemeral"},
                 }
             ],
             temperature=1,
@@ -73,6 +77,7 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
                         },
                     },
                     "name": "x",
+                    "cache_control": {"type": "ephemeral"},
                     "description": "Get the current weather in a given location",
                 }
             ],
@@ -152,6 +157,7 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
                 {
                     "text": "Today's date is 2024-06-01.",
                     "type": "text",
+                    "cache_control": {"type": "ephemeral"},
                 }
             ],
             temperature=1,
@@ -175,6 +181,7 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
                         },
                     },
                     "name": "x",
+                    "cache_control": {"type": "ephemeral"},
                     "description": "Get the current weather in a given location",
                 }
             ],
@@ -232,6 +239,99 @@ def test_deprecated_model_warning(self, client: Anthropic) -> None:
                     model=deprecated_model,
                 )
 
+    @parametrize
+    def test_method_count_tokens(self, client: Anthropic) -> None:
+        message = client.messages.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+        )
+        assert_matches_type(MessageTokensCount, message, path=["response"])
+
+    @parametrize
+    def test_method_count_tokens_with_all_params(self, client: Anthropic) -> None:
+        message = client.messages.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+            system=[
+                {
+                    "text": "Today's date is 2024-06-01.",
+                    "type": "text",
+                    "cache_control": {"type": "ephemeral"},
+                }
+            ],
+            tool_choice={
+                "type": "auto",
+                "disable_parallel_tool_use": True,
+            },
+            tools=[
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                }
+            ],
+        )
+        assert_matches_type(MessageTokensCount, message, path=["response"])
+
+    @parametrize
+    def test_raw_response_count_tokens(self, client: Anthropic) -> None:
+        response = client.messages.with_raw_response.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(MessageTokensCount, message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_count_tokens(self, client: Anthropic) -> None:
+        with client.messages.with_streaming_response.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = response.parse()
+            assert_matches_type(MessageTokensCount, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncMessages:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
@@ -268,6 +368,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                 {
                     "text": "Today's date is 2024-06-01.",
                     "type": "text",
+                    "cache_control": {"type": "ephemeral"},
                 }
             ],
             temperature=1,
@@ -291,6 +392,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                         },
                     },
                     "name": "x",
+                    "cache_control": {"type": "ephemeral"},
                     "description": "Get the current weather in a given location",
                 }
             ],
@@ -370,6 +472,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                 {
                     "text": "Today's date is 2024-06-01.",
                     "type": "text",
+                    "cache_control": {"type": "ephemeral"},
                 }
             ],
             temperature=1,
@@ -393,6 +496,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                         },
                     },
                     "name": "x",
+                    "cache_control": {"type": "ephemeral"},
                     "description": "Get the current weather in a given location",
                 }
             ],
@@ -449,3 +553,96 @@ async def test_deprecated_model_warning(self, async_client: AsyncAnthropic) -> N
                     messages=[{"role": "user", "content": "Hello"}],
                     model=deprecated_model,
                 )
+
+    @parametrize
+    async def test_method_count_tokens(self, async_client: AsyncAnthropic) -> None:
+        message = await async_client.messages.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+        )
+        assert_matches_type(MessageTokensCount, message, path=["response"])
+
+    @parametrize
+    async def test_method_count_tokens_with_all_params(self, async_client: AsyncAnthropic) -> None:
+        message = await async_client.messages.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+            system=[
+                {
+                    "text": "Today's date is 2024-06-01.",
+                    "type": "text",
+                    "cache_control": {"type": "ephemeral"},
+                }
+            ],
+            tool_choice={
+                "type": "auto",
+                "disable_parallel_tool_use": True,
+            },
+            tools=[
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                }
+            ],
+        )
+        assert_matches_type(MessageTokensCount, message, path=["response"])
+
+    @parametrize
+    async def test_raw_response_count_tokens(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.messages.with_raw_response.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(MessageTokensCount, message, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_count_tokens(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.messages.with_streaming_response.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = await response.parse()
+            assert_matches_type(MessageTokensCount, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
new file mode 100644
index 00000000..34b4961a
--- /dev/null
+++ b/tests/api_resources/test_models.py
@@ -0,0 +1,167 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from anthropic import Anthropic, AsyncAnthropic
+from tests.utils import assert_matches_type
+from anthropic.types import ModelInfo
+from anthropic.pagination import SyncPage, AsyncPage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestModels:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_retrieve(self, client: Anthropic) -> None:
+        model = client.models.retrieve(
+            "model_id",
+        )
+        assert_matches_type(ModelInfo, model, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: Anthropic) -> None:
+        response = client.models.with_raw_response.retrieve(
+            "model_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(ModelInfo, model, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: Anthropic) -> None:
+        with client.models.with_streaming_response.retrieve(
+            "model_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(ModelInfo, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: Anthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
+            client.models.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: Anthropic) -> None:
+        model = client.models.list()
+        assert_matches_type(SyncPage[ModelInfo], model, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: Anthropic) -> None:
+        model = client.models.list(
+            after_id="after_id",
+            before_id="before_id",
+            limit=1,
+        )
+        assert_matches_type(SyncPage[ModelInfo], model, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: Anthropic) -> None:
+        response = client.models.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(SyncPage[ModelInfo], model, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: Anthropic) -> None:
+        with client.models.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(SyncPage[ModelInfo], model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncModels:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncAnthropic) -> None:
+        model = await async_client.models.retrieve(
+            "model_id",
+        )
+        assert_matches_type(ModelInfo, model, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.models.with_raw_response.retrieve(
+            "model_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(ModelInfo, model, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.models.with_streaming_response.retrieve(
+            "model_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(ModelInfo, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncAnthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
+            await async_client.models.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncAnthropic) -> None:
+        model = await async_client.models.list()
+        assert_matches_type(AsyncPage[ModelInfo], model, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncAnthropic) -> None:
+        model = await async_client.models.list(
+            after_id="after_id",
+            before_id="before_id",
+            limit=1,
+        )
+        assert_matches_type(AsyncPage[ModelInfo], model, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.models.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(AsyncPage[ModelInfo], model, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.models.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(AsyncPage[ModelInfo], model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True