Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/models/anthropic.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-wit
3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly
4. **Cache All Messages**: Set [`AnthropicModelSettings.anthropic_cache_messages`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_messages] to `True` to automatically cache all messages

!!! note "Amazon Bedrock"
When using `AsyncAnthropicBedrock`, the TTL parameter is automatically omitted from all cache control settings (including `CachePoint`, `anthropic_cache_instructions`, `anthropic_cache_tool_definitions`, and `anthropic_cache_messages`) because Bedrock doesn't support explicit TTL.

### Example 1: Automatic Message Caching

Use `anthropic_cache_messages` to automatically cache all messages up to and including the newest user message:
Expand Down
2 changes: 1 addition & 1 deletion pydantic_ai_slim/pydantic_ai/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,7 @@ class CachePoint:
Supported by:
* Anthropic. See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information."""
* Anthropic (automatically omitted for Bedrock, as it does not support explicit TTL). See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information."""


MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent
Expand Down
35 changes: 29 additions & 6 deletions pydantic_ai_slim/pydantic_ai/models/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ class AnthropicModelSettings(ModelSettings, total=False):
When enabled, the last tool in the `tools` array will have `cache_control` set,
allowing Anthropic to cache tool definitions and reduce costs.
If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
TTL is automatically omitted for Bedrock, as it does not support explicit TTL.
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
"""

Expand All @@ -181,6 +182,7 @@ class AnthropicModelSettings(ModelSettings, total=False):
When enabled, the last system prompt will have `cache_control` set,
allowing Anthropic to cache system instructions and reduce costs.
If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
TTL is automatically omitted for Bedrock, as it does not support explicit TTL.
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
"""

Expand All @@ -191,6 +193,7 @@ class AnthropicModelSettings(ModelSettings, total=False):
in the final user message, which is useful for caching conversation history
or context in multi-turn conversations.
If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
TTL is automatically omitted for Bedrock, as it does not support explicit TTL.

Note: Uses 1 of Anthropic's 4 available cache points per request. Any additional CachePoint
markers in messages will be automatically limited to respect the 4-cache-point maximum.
Expand Down Expand Up @@ -565,7 +568,7 @@ def _get_tools(
# If True, use '5m'; otherwise use the specified ttl value
ttl: Literal['5m', '1h'] = '5m' if cache_tool_defs is True else cache_tool_defs
last_tool = tools[-1]
last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)
last_tool['cache_control'] = self._build_cache_control(ttl)

return tools

Expand Down Expand Up @@ -867,7 +870,7 @@ async def _map_message( # noqa: C901
BetaTextBlockParam(
text=content,
type='text',
cache_control=BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl),
cache_control=self._build_cache_control(ttl),
)
]
else:
Expand All @@ -883,7 +886,7 @@ async def _map_message( # noqa: C901
BetaTextBlockParam(
type='text',
text=system_prompt,
cache_control=BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl),
cache_control=self._build_cache_control(ttl),
)
]
return system_prompt_blocks, anthropic_messages
Expand Down Expand Up @@ -959,11 +962,31 @@ def _limit_cache_points(
# Exceeded limit, remove this cache point
del block_dict['cache_control']

@staticmethod
def _add_cache_control_to_last_param(params: list[BetaContentBlockParam], ttl: Literal['5m', '1h'] = '5m') -> None:
def _build_cache_control(self, ttl: Literal['5m', '1h'] = '5m') -> BetaCacheControlEphemeralParam:
"""Build cache control dict, automatically omitting TTL for Bedrock clients.

Args:
ttl: The cache time-to-live ('5m' or '1h'). Ignored for Bedrock clients.

Returns:
A cache control dict suitable for the current client type.
"""
if isinstance(self.client, AsyncAnthropicBedrock):
# Bedrock doesn't support TTL, use cast to satisfy type checker
return cast(BetaCacheControlEphemeralParam, {'type': 'ephemeral'})
return BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)

def _add_cache_control_to_last_param(
self, params: list[BetaContentBlockParam], ttl: Literal['5m', '1h'] = '5m'
) -> None:
"""Add cache control to the last content block param.

See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.

Args:
params: List of content block params to modify.
ttl: The cache time-to-live ('5m' or '1h'). This is automatically ignored for
Bedrock clients, which don't support explicit TTL parameters.
"""
if not params:
raise UserError(
Expand All @@ -981,7 +1004,7 @@ def _add_cache_control_to_last_param(params: list[BetaContentBlockParam], ttl: L
raise UserError(f'Cache control not supported for param type: {last_param["type"]}')

# Add cache_control to the last param
last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)
last_param['cache_control'] = self._build_cache_control(ttl)

@staticmethod
async def _map_user_prompt(
Expand Down
128 changes: 122 additions & 6 deletions tests/models/test_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,10 @@ async def test_async_request_prompt_caching(allow_model_requests: None):


async def test_cache_point_adds_cache_control(allow_model_requests: None):
"""Test that CachePoint correctly adds cache_control to content blocks."""
"""Test that CachePoint correctly adds cache_control to content blocks.

By default, CachePoint uses ttl='5m'. For non-Bedrock clients, the ttl field is included.
"""
c = completion_message(
[BetaTextBlock(text='response', type='text')],
usage=BetaUsage(input_tokens=3, output_tokens=5),
Expand All @@ -317,10 +320,10 @@ async def test_cache_point_adds_cache_control(allow_model_requests: None):
m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
agent = Agent(m)

# Test with CachePoint after text content
# Test with CachePoint after text content (default ttl='5m')
await agent.run(['Some context to cache', CachePoint(), 'Now the question'])

# Verify cache_control was added to the right content block
# Verify cache_control was added with default ttl='5m'
completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
messages = completion_kwargs['messages']
assert messages == snapshot(
Expand Down Expand Up @@ -355,6 +358,7 @@ async def test_cache_point_multiple_markers(allow_model_requests: None):
completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
content = completion_kwargs['messages'][0]['content']

# Default ttl='5m' for non-Bedrock clients
assert content == snapshot(
[
{'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}},
Expand Down Expand Up @@ -402,6 +406,7 @@ async def test_cache_point_with_image_content(allow_model_requests: None):
completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
content = completion_kwargs['messages'][0]['content']

# Default ttl='5m' for non-Bedrock clients
assert content == snapshot(
[
{
Expand Down Expand Up @@ -434,16 +439,127 @@ async def test_cache_point_in_otel_message_parts(allow_model_requests: None):

def test_cache_control_unsupported_param_type():
"""Test that cache control raises error for unsupported param types."""
from unittest.mock import MagicMock

from pydantic_ai.exceptions import UserError
from pydantic_ai.models.anthropic import AnthropicModel

# Create a list with an unsupported param type (document)
# We'll use a mock document block param
# Create a mock model instance
mock_client = MagicMock()
mock_client.__class__.__name__ = 'AsyncAnthropic'
mock_client.base_url = 'https://api.anthropic.com'
m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))

# Create a list with an unsupported param type (thinking)
params: list[dict[str, Any]] = [{'type': 'thinking', 'source': {'data': 'test'}}]

with pytest.raises(UserError, match='Cache control not supported for param type: thinking'):
AnthropicModel._add_cache_control_to_last_param(params) # type: ignore[arg-type] # Testing internal method
m._add_cache_control_to_last_param(params) # type: ignore[arg-type] # Testing internal method


def test_build_cache_control_bedrock_omits_ttl():
"""Test that _build_cache_control automatically omits TTL for Bedrock clients."""
from unittest.mock import MagicMock

from anthropic import AsyncAnthropicBedrock

# Create a mock client using spec=AsyncAnthropicBedrock for isinstance check
mock_bedrock_client = MagicMock(spec=AsyncAnthropicBedrock)
mock_bedrock_client.base_url = 'https://bedrock.amazonaws.com'

m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_bedrock_client))

# Verify cache_control is built without TTL for Bedrock
cache_control = m._build_cache_control('5m') # pyright: ignore[reportPrivateUsage]
assert cache_control == {'type': 'ephemeral'} # No 'ttl' field

cache_control_1h = m._build_cache_control('1h') # pyright: ignore[reportPrivateUsage]
assert cache_control_1h == {'type': 'ephemeral'} # TTL still omitted


def test_build_cache_control_standard_client_includes_ttl():
"""Test that _build_cache_control includes TTL for standard Anthropic clients."""
from unittest.mock import MagicMock

# Create a mock client that looks like standard AsyncAnthropic
mock_client = MagicMock()
mock_client.__class__.__name__ = 'AsyncAnthropic'
mock_client.base_url = 'https://api.anthropic.com'

m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))

# Verify cache_control includes TTL for standard clients
cache_control = m._build_cache_control('5m') # pyright: ignore[reportPrivateUsage]
assert cache_control == {'type': 'ephemeral', 'ttl': '5m'}

cache_control_1h = m._build_cache_control('1h') # pyright: ignore[reportPrivateUsage]
assert cache_control_1h == {'type': 'ephemeral', 'ttl': '1h'}


async def test_cache_point_with_5m_ttl(allow_model_requests: None):
"""Test that CachePoint with explicit ttl='5m' includes the ttl field."""
c = completion_message(
[BetaTextBlock(text='response', type='text')],
usage=BetaUsage(input_tokens=3, output_tokens=5),
)
mock_client = MockAnthropic.create_mock(c)
m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
agent = Agent(m)

# Test with explicit CachePoint(ttl='5m')
await agent.run(['Some context to cache', CachePoint(ttl='5m'), 'Now the question'])

# Verify cache_control was added with 5m ttl
completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
messages = completion_kwargs['messages']
assert messages == snapshot(
[
{
'role': 'user',
'content': [
{
'text': 'Some context to cache',
'type': 'text',
'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
},
{'text': 'Now the question', 'type': 'text'},
],
}
]
)


async def test_cache_point_with_1h_ttl(allow_model_requests: None):
"""Test that CachePoint with ttl='1h' correctly sets the TTL."""
c = completion_message(
[BetaTextBlock(text='response', type='text')],
usage=BetaUsage(input_tokens=3, output_tokens=5),
)
mock_client = MockAnthropic.create_mock(c)
m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
agent = Agent(m)

# Test with CachePoint(ttl='1h')
await agent.run(['Some context to cache', CachePoint(ttl='1h'), 'Now the question'])

# Verify cache_control was added with 1h ttl
completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
messages = completion_kwargs['messages']
assert messages == snapshot(
[
{
'role': 'user',
'content': [
{
'text': 'Some context to cache',
'type': 'text',
'cache_control': {'type': 'ephemeral', 'ttl': '1h'},
},
{'text': 'Now the question', 'type': 'text'},
],
}
]
)


async def test_anthropic_cache_tools(allow_model_requests: None):
Expand Down