pydantic · DouweM · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025
diff --git a/docs/models/anthropic.md b/docs/models/anthropic.md
@@ -87,6 +87,9 @@ Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-wit
 3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly
 4. **Cache All Messages**: Set [`AnthropicModelSettings.anthropic_cache_messages`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_messages] to `True` to automatically cache all messages
 
+!!! note "Amazon Bedrock"
+ When using `AsyncAnthropicBedrock`, the TTL parameter is automatically omitted from all cache control settings (including `CachePoint`, `anthropic_cache_instructions`, `anthropic_cache_tool_definitions`, and `anthropic_cache_messages`) because Bedrock doesn't support explicit TTL.
+
 ### Example 1: Automatic Message Caching
 
 Use `anthropic_cache_messages` to automatically cache all messages up to and including the newest user message:

diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py
@@ -653,7 +653,7 @@ class CachePoint:
 
  Supported by:
 
- * Anthropic. See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information."""
+ * Anthropic (automatically omitted for Bedrock, as it does not support explicit TTL). See https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration for more information."""
 
 
 MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent

diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py
@@ -172,6 +172,7 @@ class AnthropicModelSettings(ModelSettings, total=False):
  When enabled, the last tool in the `tools` array will have `cache_control` set,
  allowing Anthropic to cache tool definitions and reduce costs.
  If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
+ TTL is automatically omitted for Bedrock, as it does not support explicit TTL.
  See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
  """
 
@@ -181,6 +182,7 @@ class AnthropicModelSettings(ModelSettings, total=False):
  When enabled, the last system prompt will have `cache_control` set,
  allowing Anthropic to cache system instructions and reduce costs.
  If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
+ TTL is automatically omitted for Bedrock, as it does not support explicit TTL.
  See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
  """
 
@@ -191,6 +193,7 @@ class AnthropicModelSettings(ModelSettings, total=False):
  in the final user message, which is useful for caching conversation history
  or context in multi-turn conversations.
  If `True`, uses TTL='5m'. You can also specify '5m' or '1h' directly.
+ TTL is automatically omitted for Bedrock, as it does not support explicit TTL.
 
  Note: Uses 1 of Anthropic's 4 available cache points per request. Any additional CachePoint
  markers in messages will be automatically limited to respect the 4-cache-point maximum.
@@ -565,7 +568,7 @@ def _get_tools(
  # If True, use '5m'; otherwise use the specified ttl value
  ttl: Literal['5m', '1h'] = '5m' if cache_tool_defs is True else cache_tool_defs
  last_tool = tools[-1]
- last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)
+ last_tool['cache_control'] = self._build_cache_control(ttl)
 
  return tools
 
@@ -867,7 +870,7 @@ async def _map_message( # noqa: C901
  BetaTextBlockParam(
  text=content,
  type='text',
- cache_control=BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl),
+ cache_control=self._build_cache_control(ttl),
  )
  ]
  else:
@@ -883,7 +886,7 @@ async def _map_message( # noqa: C901
  BetaTextBlockParam(
  type='text',
  text=system_prompt,
- cache_control=BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl),
+ cache_control=self._build_cache_control(ttl),
  )
  ]
  return system_prompt_blocks, anthropic_messages
@@ -959,11 +962,31 @@ def _limit_cache_points(
  # Exceeded limit, remove this cache point
  del block_dict['cache_control']
 
- @staticmethod
- def _add_cache_control_to_last_param(params: list[BetaContentBlockParam], ttl: Literal['5m', '1h'] = '5m') -> None:
+ def _build_cache_control(self, ttl: Literal['5m', '1h'] = '5m') -> BetaCacheControlEphemeralParam:
+ """Build cache control dict, automatically omitting TTL for Bedrock clients.
+
+ Args:
+ ttl: The cache time-to-live ('5m' or '1h'). Ignored for Bedrock clients.
+
+ Returns:
+ A cache control dict suitable for the current client type.
+ """
+ if isinstance(self.client, AsyncAnthropicBedrock):
+ # Bedrock doesn't support TTL, use cast to satisfy type checker
+ return cast(BetaCacheControlEphemeralParam, {'type': 'ephemeral'})
+ return BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)
+
+ def _add_cache_control_to_last_param(
+ self, params: list[BetaContentBlockParam], ttl: Literal['5m', '1h'] = '5m'
+ ) -> None:
  """Add cache control to the last content block param.
 
  See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
+
+ Args:
+ params: List of content block params to modify.
+ ttl: The cache time-to-live ('5m' or '1h'). This is automatically ignored for
+ Bedrock clients, which don't support explicit TTL parameters.
  """
  if not params:
  raise UserError(
@@ -981,7 +1004,7 @@ def _add_cache_control_to_last_param(params: list[BetaContentBlockParam], ttl: L
  raise UserError(f'Cache control not supported for param type: {last_param["type"]}')
 
  # Add cache_control to the last param
- last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral', ttl=ttl)
+ last_param['cache_control'] = self._build_cache_control(ttl)
 
  @staticmethod
  async def _map_user_prompt(

diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py
@@ -308,7 +308,10 @@ async def test_async_request_prompt_caching(allow_model_requests: None):
 
 
 async def test_cache_point_adds_cache_control(allow_model_requests: None):
- """Test that CachePoint correctly adds cache_control to content blocks."""
+ """Test that CachePoint correctly adds cache_control to content blocks.
+
+ By default, CachePoint uses ttl='5m'. For non-Bedrock clients, the ttl field is included.
+ """
  c = completion_message(
  [BetaTextBlock(text='response', type='text')],
  usage=BetaUsage(input_tokens=3, output_tokens=5),
@@ -317,10 +320,10 @@ async def test_cache_point_adds_cache_control(allow_model_requests: None):
  m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
  agent = Agent(m)
 
- # Test with CachePoint after text content
+ # Test with CachePoint after text content (default ttl='5m')
  await agent.run(['Some context to cache', CachePoint(), 'Now the question'])
 
- # Verify cache_control was added to the right content block
+ # Verify cache_control was added with default ttl='5m'
  completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
  messages = completion_kwargs['messages']
  assert messages == snapshot(
@@ -355,6 +358,7 @@ async def test_cache_point_multiple_markers(allow_model_requests: None):
  completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
  content = completion_kwargs['messages'][0]['content']
 
+ # Default ttl='5m' for non-Bedrock clients
  assert content == snapshot(
  [
  {'text': 'First chunk', 'type': 'text', 'cache_control': {'type': 'ephemeral', 'ttl': '5m'}},
@@ -402,6 +406,7 @@ async def test_cache_point_with_image_content(allow_model_requests: None):
  completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
  content = completion_kwargs['messages'][0]['content']
 
+ # Default ttl='5m' for non-Bedrock clients
  assert content == snapshot(
  [
  {
@@ -434,16 +439,127 @@ async def test_cache_point_in_otel_message_parts(allow_model_requests: None):
 
 def test_cache_control_unsupported_param_type():
  """Test that cache control raises error for unsupported param types."""
+ from unittest.mock import MagicMock
 
  from pydantic_ai.exceptions import UserError
  from pydantic_ai.models.anthropic import AnthropicModel
 
- # Create a list with an unsupported param type (document)
- # We'll use a mock document block param
+ # Create a mock model instance
+ mock_client = MagicMock()
+ mock_client.__class__.__name__ = 'AsyncAnthropic'
+ mock_client.base_url = 'https://api.anthropic.com'
+ m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
+
+ # Create a list with an unsupported param type (thinking)
  params: list[dict[str, Any]] = [{'type': 'thinking', 'source': {'data': 'test'}}]
 
  with pytest.raises(UserError, match='Cache control not supported for param type: thinking'):
- AnthropicModel._add_cache_control_to_last_param(params) # type: ignore[arg-type] # Testing internal method
+ m._add_cache_control_to_last_param(params) # type: ignore[arg-type] # Testing internal method
+
+
+def test_build_cache_control_bedrock_omits_ttl():
+ """Test that _build_cache_control automatically omits TTL for Bedrock clients."""
+ from unittest.mock import MagicMock
+
+ from anthropic import AsyncAnthropicBedrock
+
+ # Create a mock client using spec=AsyncAnthropicBedrock for isinstance check
+ mock_bedrock_client = MagicMock(spec=AsyncAnthropicBedrock)
+ mock_bedrock_client.base_url = 'https://bedrock.amazonaws.com'
+
+ m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_bedrock_client))
+
+ # Verify cache_control is built without TTL for Bedrock
+ cache_control = m._build_cache_control('5m') # pyright: ignore[reportPrivateUsage]
+ assert cache_control == {'type': 'ephemeral'} # No 'ttl' field
+
+ cache_control_1h = m._build_cache_control('1h') # pyright: ignore[reportPrivateUsage]
+ assert cache_control_1h == {'type': 'ephemeral'} # TTL still omitted
+
+
+def test_build_cache_control_standard_client_includes_ttl():
+ """Test that _build_cache_control includes TTL for standard Anthropic clients."""
+ from unittest.mock import MagicMock
+
+ # Create a mock client that looks like standard AsyncAnthropic
+ mock_client = MagicMock()
+ mock_client.__class__.__name__ = 'AsyncAnthropic'
+ mock_client.base_url = 'https://api.anthropic.com'
+
+ m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
+
+ # Verify cache_control includes TTL for standard clients
+ cache_control = m._build_cache_control('5m') # pyright: ignore[reportPrivateUsage]
+ assert cache_control == {'type': 'ephemeral', 'ttl': '5m'}
+
+ cache_control_1h = m._build_cache_control('1h') # pyright: ignore[reportPrivateUsage]
+ assert cache_control_1h == {'type': 'ephemeral', 'ttl': '1h'}
+
+
+async def test_cache_point_with_5m_ttl(allow_model_requests: None):
+ """Test that CachePoint with explicit ttl='5m' includes the ttl field."""
+ c = completion_message(
+ [BetaTextBlock(text='response', type='text')],
+ usage=BetaUsage(input_tokens=3, output_tokens=5),
+ )
+ mock_client = MockAnthropic.create_mock(c)
+ m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
+ agent = Agent(m)
+
+ # Test with explicit CachePoint(ttl='5m')
+ await agent.run(['Some context to cache', CachePoint(ttl='5m'), 'Now the question'])
+
+ # Verify cache_control was added with 5m ttl
+ completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
+ messages = completion_kwargs['messages']
+ assert messages == snapshot(
+ [
+ {
+ 'role': 'user',
+ 'content': [
+ {
+ 'text': 'Some context to cache',
+ 'type': 'text',
+ 'cache_control': {'type': 'ephemeral', 'ttl': '5m'},
+ },
+ {'text': 'Now the question', 'type': 'text'},
+ ],
+ }
+ ]
+ )
+
+
+async def test_cache_point_with_1h_ttl(allow_model_requests: None):
+ """Test that CachePoint with ttl='1h' correctly sets the TTL."""
+ c = completion_message(
+ [BetaTextBlock(text='response', type='text')],
+ usage=BetaUsage(input_tokens=3, output_tokens=5),
+ )
+ mock_client = MockAnthropic.create_mock(c)
+ m = AnthropicModel('claude-haiku-4-5', provider=AnthropicProvider(anthropic_client=mock_client))
+ agent = Agent(m)
+
+ # Test with CachePoint(ttl='1h')
+ await agent.run(['Some context to cache', CachePoint(ttl='1h'), 'Now the question'])
+
+ # Verify cache_control was added with 1h ttl
+ completion_kwargs = get_mock_chat_completion_kwargs(mock_client)[0]
+ messages = completion_kwargs['messages']
+ assert messages == snapshot(
+ [
+ {
+ 'role': 'user',
+ 'content': [
+ {
+ 'text': 'Some context to cache',
+ 'type': 'text',
+ 'cache_control': {'type': 'ephemeral', 'ttl': '1h'},
+ },
+ {'text': 'Now the question', 'type': 'text'},
+ ],
+ }
+ ]
+ )
 
 
 async def test_anthropic_cache_tools(allow_model_requests: None):