LiteLLM Minor Fixes & Improvements (12/05/2024) (#7037)

* fix(together_ai/chat): only return response_format + tools for supported models Fixes https://github.com/BerriAI/litellm/issues/6972 * feat(bedrock/rerank): initial working commit for bedrock rerank api support Closes https://github.com/BerriAI/litellm/issues/7021 * feat(bedrock/rerank): async bedrock rerank api support Addresses https://github.com/BerriAI/litellm/issues/7021 * build(model_prices_and_context_window.json): add 'supports_prompt_caching' for bedrock models + cleanup cross-region from model list (duplicate information - lead to inconsistencies ) * docs(json_mode.md): clarify model support for json schema Closes https://github.com/BerriAI/litellm/issues/6998 * fix(_service_logger.py): handle dd callback in list ensure failed spend tracking is logged to datadog * feat(converse_transformation.py): translate from anthropic format to bedrock format Closes https://github.com/BerriAI/litellm/issues/7030 * fix: fix linting errors * test: fix test
2025-04-27 03:34:10 +00:00 · 2024-12-05 00:02:31 -08:00 · 2024-12-05 00:02:31 -08:00 · 61b35c12bb
commit 61b35c12bb
parent 12dfd14b52
24 changed files with 858 additions and 400 deletions
--- a/tests/local_testing/test_prompt_caching.py
+++ b/tests/local_testing/test_prompt_caching.py
@ -38,76 +38,6 @@ def _usage_format_tests(usage: litellm.Usage):
    assert usage.prompt_tokens > usage.prompt_tokens_details.cached_tokens


-@pytest.mark.parametrize(
-    "model",
-    [
-        "anthropic/claude-3-5-sonnet-20240620",
-        # "openai/gpt-4o",
-        # "deepseek/deepseek-chat",
-    ],
-)
-def test_prompt_caching_model(model):
-    try:
-        for _ in range(2):
-            response = litellm.completion(
-                model=model,
-                messages=[
-                    # System Message
-                    {
-                        "role": "system",
-                        "content": [
-                            {
-                                "type": "text",
-                                "text": "Here is the full text of a complex legal agreement"
-                                * 400,
-                                "cache_control": {"type": "ephemeral"},
-                            }
-                        ],
-                    },
-                    # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
-                    {
-                        "role": "user",
-                        "content": [
-                            {
-                                "type": "text",
-                                "text": "What are the key terms and conditions in this agreement?",
-                                "cache_control": {"type": "ephemeral"},
-                            }
-                        ],
-                    },
-                    {
-                        "role": "assistant",
-                        "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
-                    },
-                    # The final turn is marked with cache-control, for continuing in followups.
-                    {
-                        "role": "user",
-                        "content": [
-                            {
-                                "type": "text",
-                                "text": "What are the key terms and conditions in this agreement?",
-                                "cache_control": {"type": "ephemeral"},
-                            }
-                        ],
-                    },
-                ],
-                temperature=0.2,
-                max_tokens=10,
-            )
-
-            _usage_format_tests(response.usage)
-
-        print("response=", response)
-        print("response.usage=", response.usage)
-
-        _usage_format_tests(response.usage)
-
-        assert "prompt_tokens_details" in response.usage
-        assert response.usage.prompt_tokens_details.cached_tokens > 0
-    except litellm.InternalServerError:
-        pass
-
-
 def test_supports_prompt_caching():
    from litellm.utils import supports_prompt_caching