liuhu
diff --git a/‎litellm/litellm_core_utils/model_param_helper.py‎
Lines changed: 14 additions & 3 deletions b/‎litellm/litellm_core_utils/model_param_helper.py‎
Lines changed: 14 additions & 3 deletions
diff --git a/‎tests/local_testing/test_caching.py‎
Lines changed: 61 additions & 0 deletions b/‎tests/local_testing/test_caching.py‎
Lines changed: 61 additions & 0 deletions
@@ -75,18 +75,29 @@ def _get_all_llm_api_params() -> Set[str]:
  combined_kwargs = combined_kwargs.difference(exclude_kwargs)
  return combined_kwargs
 
+ @staticmethod
+ def get_litellm_provider_specific_params_for_chat_params() -> Set[str]:
+ return set(["thinking"])
+
  @staticmethod
  def _get_litellm_supported_chat_completion_kwargs() -> Set[str]:
  """
  Get the litellm supported chat completion kwargs
 
  This follows the OpenAI API Spec
  """
- all_chat_completion_kwargs = set(
+ non_streaming_params: Set[str] = set(
  getattr(CompletionCreateParamsNonStreaming, "__annotations__", {}).keys()
- ).union(
- set(getattr(CompletionCreateParamsStreaming, "__annotations__", {}).keys())
  )
+ streaming_params: Set[str] = set(
+ getattr(CompletionCreateParamsStreaming, "__annotations__", {}).keys()
+ )
+ litellm_provider_specific_params: Set[str] = (
+ ModelParamHelper.get_litellm_provider_specific_params_for_chat_params()
+ )
+ all_chat_completion_kwargs: Set[str] = non_streaming_params.union(
+ streaming_params
+ ).union(litellm_provider_specific_params)
  return all_chat_completion_kwargs
 
  @staticmethod
 
@@ -2608,3 +2608,64 @@ def test_caching_with_reasoning_content():
  print(f"response 2: {response_2.model_dump_json(indent=4)}")
  assert response_2._hidden_params["cache_hit"] == True
  assert response_2.choices[0].message.reasoning_content is not None
+
+
+def test_caching_reasoning_args_miss(): # test in memory cache
+ try:
+ #litellm._turn_on_debug()
+ litellm.set_verbose = True
+ litellm.cache = Cache(
+ )
+ response1 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, reasoning_effort="low", mock_response="My response")
+ response2 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, mock_response="My response")
+ print(f"response1: {response1}")
+ print(f"response2: {response2}")
+ assert response1.id != response2.id
+ except Exception as e:
+ print(f"error occurred: {traceback.format_exc()}")
+ pytest.fail(f"Error occurred: {e}")
+
+def test_caching_reasoning_args_hit(): # test in memory cache
+ try:
+ #litellm._turn_on_debug()
+ litellm.set_verbose = True
+ litellm.cache = Cache(
+ )
+ response1 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, reasoning_effort="low", mock_response="My response")
+ response2 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, reasoning_effort="low", mock_response="My response")
+ print(f"response1: {response1}")
+ print(f"response2: {response2}")
+ assert response1.id == response2.id
+ except Exception as e:
+ print(f"error occurred: {traceback.format_exc()}")
+ pytest.fail(f"Error occurred: {e}")
+ 
+def test_caching_thinking_args_miss(): # test in memory cache
+ try:
+ #litellm._turn_on_debug()
+ litellm.set_verbose = True
+ litellm.cache = Cache(
+ )
+ response1 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, thinking={"type": "enabled", "budget_tokens": 1024}, mock_response="My response")
+ response2 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, mock_response="My response")
+ print(f"response1: {response1}")
+ print(f"response2: {response2}")
+ assert response1.id != response2.id
+ except Exception as e:
+ print(f"error occurred: {traceback.format_exc()}")
+ pytest.fail(f"Error occurred: {e}")
+
+def test_caching_thinking_args_hit(): # test in memory cache
+ try:
+ #litellm._turn_on_debug()
+ litellm.set_verbose = True
+ litellm.cache = Cache(
+ )
+ response1 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, thinking={"type": "enabled", "budget_tokens": 1024}, mock_response="My response" )
+ response2 = completion(model="claude-3-7-sonnet-latest", messages=messages, caching=True, thinking={"type": "enabled", "budget_tokens": 1024}, mock_response="My response")
+ print(f"response1: {response1}")
+ print(f"response2: {response2}")
+ assert response1.id == response2.id
+ except Exception as e:
+ print(f"error occurred: {traceback.format_exc()}")
+ pytest.fail(f"Error occurred: {e}")