From f2c0a31e3cb46c3d9586af454fef77adc670f64f Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Sat, 5 Oct 2024 18:59:11 -0400 Subject: [PATCH] LiteLLM Minor Fixes & Improvements (10/05/2024) (#6083) * docs(prompt_caching.md): add prompt caching cost calc example to docs * docs(prompt_caching.md): add proxy examples to docs * feat(utils.py): expose new helper `supports_prompt_caching()` to check if a model supports prompt caching * docs(prompt_caching.md): add docs on checking model support for prompt caching * build: fix invalid json --- .../docs/completion/prompt_caching.md | 305 +++++++++++++++++- ...odel_prices_and_context_window_backup.json | 69 ++-- litellm/proxy/_new_secret_config.yaml | 12 +- litellm/utils.py | 34 ++ model_prices_and_context_window.json | 69 ++-- tests/local_testing/test_get_model_file.py | 21 +- tests/local_testing/test_prompt_caching.py | 8 + 7 files changed, 459 insertions(+), 59 deletions(-) diff --git a/docs/my-website/docs/completion/prompt_caching.md b/docs/my-website/docs/completion/prompt_caching.md index 3a5537b52..5c795778e 100644 --- a/docs/my-website/docs/completion/prompt_caching.md +++ b/docs/my-website/docs/completion/prompt_caching.md @@ -1,3 +1,6 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Prompt Caching For OpenAI + Anthropic + Deepseek, LiteLLM follows the OpenAI prompt caching usage object format: @@ -30,6 +33,9 @@ For OpenAI + Anthropic + Deepseek, LiteLLM follows the OpenAI prompt caching usa Note: OpenAI caching is only available for prompts containing 1024 tokens or more + + + ```python from litellm import completion import os @@ -87,6 +93,90 @@ assert "prompt_tokens_details" in response.usage assert response.usage.prompt_tokens_details.cached_tokens > 0 ``` + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: gpt-4o + litellm_params: + model: openai/gpt-4o + api_key: os.environ/OPENAI_API_KEY +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```python +from openai import OpenAI +import os + +client = OpenAI( + api_key="LITELLM_PROXY_KEY", # sk-1234 + base_url="LITELLM_PROXY_BASE" # http://0.0.0.0:4000 +) + +for _ in range(2): + response = client.chat.completions.create( + model="gpt-4o", + messages=[ + # System Message + { + "role": "system", + "content": [ + { + "type": "text", + "text": "Here is the full text of a complex legal agreement" + * 400, + } + ], + }, + # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + } + ], + }, + { + "role": "assistant", + "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo", + }, + # The final turn is marked with cache-control, for continuing in followups. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + } + ], + }, + ], + temperature=0.2, + max_tokens=10, + ) + +print("response=", response) +print("response.usage=", response.usage) + +assert "prompt_tokens_details" in response.usage +assert response.usage.prompt_tokens_details.cached_tokens > 0 +``` + + + + ### Anthropic Example Anthropic charges for cache writes. @@ -95,6 +185,9 @@ Specify the content to cache with `"cache_control": {"type": "ephemeral"}`. If you pass that in for any other llm provider, it will be ignored. + + + ```python from litellm import completion import litellm @@ -129,6 +222,65 @@ response = completion( print(response.usage) ``` + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: claude-3-5-sonnet-20240620 + litellm_params: + model: anthropic/claude-3-5-sonnet-20240620 + api_key: os.environ/ANTHROPIC_API_KEY +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```python +from openai import OpenAI +import os + +client = OpenAI( + api_key="LITELLM_PROXY_KEY", # sk-1234 + base_url="LITELLM_PROXY_BASE" # http://0.0.0.0:4000 +) + +response = client.chat.completions.create( + model="claude-3-5-sonnet-20240620", + messages=[ + { + "role": "system", + "content": [ + { + "type": "text", + "text": "You are an AI assistant tasked with analyzing legal documents.", + }, + { + "type": "text", + "text": "Here is the full text of a complex legal agreement" * 400, + "cache_control": {"type": "ephemeral"}, + }, + ], + }, + { + "role": "user", + "content": "what are the key terms and conditions in this agreement?", + }, + ] +) + +print(response.usage) +``` + + + ### Deepeek Example @@ -196,4 +348,155 @@ response_2 = litellm.completion(model=model_name, messages=message_2) # Add any assertions here to check the response print(response_2.usage) -``` \ No newline at end of file +``` + + +## Calculate Cost + +Cost cache-hit prompt tokens can differ from cache-miss prompt tokens. + +Use the `completion_cost()` function for calculating cost ([handles prompt caching cost calculation](https://github.com/BerriAI/litellm/blob/f7ce1173f3315cc6cae06cf9bcf12e54a2a19705/litellm/llms/anthropic/cost_calculation.py#L12) as well). [**See more helper functions**](./token_usage.md) + +```python +cost = completion_cost(completion_response=response, model=model) +``` + +### Usage + + + + +```python +from litellm import completion, completion_cost +import litellm +import os + +litellm.set_verbose = True # 👈 SEE RAW REQUEST +os.environ["ANTHROPIC_API_KEY"] = "" +model = "anthropic/claude-3-5-sonnet-20240620" +response = completion( + model=model, + messages=[ + { + "role": "system", + "content": [ + { + "type": "text", + "text": "You are an AI assistant tasked with analyzing legal documents.", + }, + { + "type": "text", + "text": "Here is the full text of a complex legal agreement" * 400, + "cache_control": {"type": "ephemeral"}, + }, + ], + }, + { + "role": "user", + "content": "what are the key terms and conditions in this agreement?", + }, + ] +) + +print(response.usage) + +cost = completion_cost(completion_response=response, model=model) + +formatted_string = f"${float(cost):.10f}" +print(formatted_string) +``` + + + +LiteLLM returns the calculated cost in the response headers - `x-litellm-response-cost` + +```python +from openai import OpenAI + +client = OpenAI( + api_key="LITELLM_PROXY_KEY", # sk-1234.. + base_url="LITELLM_PROXY_BASE" # http://0.0.0.0:4000 +) +response = client.chat.completions.with_raw_response.create( + messages=[{ + "role": "user", + "content": "Say this is a test", + }], + model="gpt-3.5-turbo", +) +print(response.headers.get('x-litellm-response-cost')) + +completion = response.parse() # get the object that `chat.completions.create()` would have returned +print(completion) +``` + + + + +## Check Model Support + +Check if a model supports prompt caching with `supports_prompt_caching()` + + + + +```python +from litellm.utils import supports_prompt_caching + +supports_pc: bool = supports_prompt_caching(model="anthropic/claude-3-5-sonnet-20240620") + +assert supports_pc +``` + + + + +Use the `/model/info` endpoint to check if a model on the proxy supports prompt caching + +1. Setup config.yaml + +```yaml +model_list: + - model_name: claude-3-5-sonnet-20240620 + litellm_params: + model: anthropic/claude-3-5-sonnet-20240620 + api_key: os.environ/ANTHROPIC_API_KEY +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl -L -X GET 'http://0.0.0.0:4000/v1/model/info' \ +-H 'Authorization: Bearer sk-1234' \ +``` + +**Expected Response** + +```bash +{ + "data": [ + { + "model_name": "claude-3-5-sonnet-20240620", + "litellm_params": { + "model": "anthropic/claude-3-5-sonnet-20240620" + }, + "model_info": { + "key": "claude-3-5-sonnet-20240620", + ... + "supports_prompt_caching": true # 👈 LOOK FOR THIS! + } + } + ] +} +``` + + + + +This checks our maintained [model info/cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) \ No newline at end of file diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 0fe6f6b87..7d060dd83 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -9,7 +9,8 @@ "mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-4": { "max_tokens": 4096, @@ -19,7 +20,8 @@ "output_cost_per_token": 0.00006, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true + "supports_function_calling": true, + "supports_prompt_caching": true }, "gpt-4o": { "max_tokens": 4096, @@ -129,7 +131,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-4o-2024-05-13": { "max_tokens": 4096, @@ -141,7 +144,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-4o-2024-08-06": { "max_tokens": 16384, @@ -166,7 +170,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "supports_prompt_caching": true }, "gpt-4-0314": { "max_tokens": 4096, @@ -175,7 +180,8 @@ "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "gpt-4-0613": { "max_tokens": 4096, @@ -185,7 +191,8 @@ "output_cost_per_token": 0.00006, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true + "supports_function_calling": true, + "supports_prompt_caching": true }, "gpt-4-32k": { "max_tokens": 4096, @@ -194,7 +201,8 @@ "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "gpt-4-32k-0314": { "max_tokens": 4096, @@ -203,7 +211,8 @@ "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "gpt-4-32k-0613": { "max_tokens": 4096, @@ -212,7 +221,8 @@ "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "gpt-4-turbo": { "max_tokens": 4096, @@ -224,7 +234,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-4-turbo-2024-04-09": { "max_tokens": 4096, @@ -236,7 +247,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-4-1106-preview": { "max_tokens": 4096, @@ -247,7 +259,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "supports_prompt_caching": true }, "gpt-4-0125-preview": { "max_tokens": 4096, @@ -258,7 +271,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "supports_prompt_caching": true }, "gpt-4-vision-preview": { "max_tokens": 4096, @@ -268,7 +282,8 @@ "output_cost_per_token": 0.00003, "litellm_provider": "openai", "mode": "chat", - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-4-1106-vision-preview": { "max_tokens": 4096, @@ -278,7 +293,8 @@ "output_cost_per_token": 0.00003, "litellm_provider": "openai", "mode": "chat", - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-3.5-turbo": { "max_tokens": 4097, @@ -288,7 +304,8 @@ "output_cost_per_token": 0.000002, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true + "supports_function_calling": true, + "supports_prompt_caching": true }, "gpt-3.5-turbo-0301": { "max_tokens": 4097, @@ -297,7 +314,8 @@ "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "gpt-3.5-turbo-0613": { "max_tokens": 4097, @@ -307,7 +325,8 @@ "output_cost_per_token": 0.000002, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true + "supports_function_calling": true, + "supports_prompt_caching": true }, "gpt-3.5-turbo-1106": { "max_tokens": 16385, @@ -318,7 +337,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "supports_prompt_caching": true }, "gpt-3.5-turbo-0125": { "max_tokens": 16385, @@ -329,7 +349,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "supports_prompt_caching": true }, "gpt-3.5-turbo-16k": { "max_tokens": 16385, @@ -338,7 +359,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "gpt-3.5-turbo-16k-0613": { "max_tokens": 16385, @@ -347,7 +369,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "ft:gpt-3.5-turbo": { "max_tokens": 4096, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 2c910865a..e2afa3f12 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,9 +1,5 @@ model_list: - - model_name: fake-openai-endpoint - litellm_params: - model: openai/fake - api_key: fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app/ - -litellm_settings: - callbacks: ["gcs_bucket"] + - model_name: claude-3-5-sonnet-20240620 + litellm_params: + model: anthropic/claude-3-5-sonnet-20240620 + api_key: os.environ/ANTHROPIC_API_KEY \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 753e07f80..cc52c1f56 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2179,6 +2179,40 @@ def supports_function_calling( ) +def supports_prompt_caching( + model: str, custom_llm_provider: Optional[str] = None +) -> bool: + """ + Check if the given model supports prompt caching and return a boolean value. + + Parameters: + model (str): The model name to be checked. + custom_llm_provider (Optional[str]): The provider to be checked. + + Returns: + bool: True if the model supports prompt caching, False otherwise. + + Raises: + Exception: If the given model is not found or there's an error in retrieval. + """ + try: + model, custom_llm_provider, _, _ = litellm.get_llm_provider( + model=model, custom_llm_provider=custom_llm_provider + ) + + model_info = litellm.get_model_info( + model=model, custom_llm_provider=custom_llm_provider + ) + + if model_info.get("supports_prompt_caching", False) is True: + return True + return False + except Exception as e: + raise Exception( + f"Model not found or error in checking prompt caching support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}" + ) + + def supports_vision(model: str, custom_llm_provider: Optional[str] = None) -> bool: """ Check if the given model supports vision and return a boolean value. diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 0fe6f6b87..7d060dd83 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -9,7 +9,8 @@ "mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-4": { "max_tokens": 4096, @@ -19,7 +20,8 @@ "output_cost_per_token": 0.00006, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true + "supports_function_calling": true, + "supports_prompt_caching": true }, "gpt-4o": { "max_tokens": 4096, @@ -129,7 +131,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-4o-2024-05-13": { "max_tokens": 4096, @@ -141,7 +144,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-4o-2024-08-06": { "max_tokens": 16384, @@ -166,7 +170,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "supports_prompt_caching": true }, "gpt-4-0314": { "max_tokens": 4096, @@ -175,7 +180,8 @@ "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "gpt-4-0613": { "max_tokens": 4096, @@ -185,7 +191,8 @@ "output_cost_per_token": 0.00006, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true + "supports_function_calling": true, + "supports_prompt_caching": true }, "gpt-4-32k": { "max_tokens": 4096, @@ -194,7 +201,8 @@ "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "gpt-4-32k-0314": { "max_tokens": 4096, @@ -203,7 +211,8 @@ "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "gpt-4-32k-0613": { "max_tokens": 4096, @@ -212,7 +221,8 @@ "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "gpt-4-turbo": { "max_tokens": 4096, @@ -224,7 +234,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-4-turbo-2024-04-09": { "max_tokens": 4096, @@ -236,7 +247,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-4-1106-preview": { "max_tokens": 4096, @@ -247,7 +259,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "supports_prompt_caching": true }, "gpt-4-0125-preview": { "max_tokens": 4096, @@ -258,7 +271,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "supports_prompt_caching": true }, "gpt-4-vision-preview": { "max_tokens": 4096, @@ -268,7 +282,8 @@ "output_cost_per_token": 0.00003, "litellm_provider": "openai", "mode": "chat", - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-4-1106-vision-preview": { "max_tokens": 4096, @@ -278,7 +293,8 @@ "output_cost_per_token": 0.00003, "litellm_provider": "openai", "mode": "chat", - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "gpt-3.5-turbo": { "max_tokens": 4097, @@ -288,7 +304,8 @@ "output_cost_per_token": 0.000002, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true + "supports_function_calling": true, + "supports_prompt_caching": true }, "gpt-3.5-turbo-0301": { "max_tokens": 4097, @@ -297,7 +314,8 @@ "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "gpt-3.5-turbo-0613": { "max_tokens": 4097, @@ -307,7 +325,8 @@ "output_cost_per_token": 0.000002, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true + "supports_function_calling": true, + "supports_prompt_caching": true }, "gpt-3.5-turbo-1106": { "max_tokens": 16385, @@ -318,7 +337,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "supports_prompt_caching": true }, "gpt-3.5-turbo-0125": { "max_tokens": 16385, @@ -329,7 +349,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "supports_prompt_caching": true }, "gpt-3.5-turbo-16k": { "max_tokens": 16385, @@ -338,7 +359,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "gpt-3.5-turbo-16k-0613": { "max_tokens": 16385, @@ -347,7 +369,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_prompt_caching": true }, "ft:gpt-3.5-turbo": { "max_tokens": 4096, diff --git a/tests/local_testing/test_get_model_file.py b/tests/local_testing/test_get_model_file.py index 949ff43b8..17bd2d7ce 100644 --- a/tests/local_testing/test_get_model_file.py +++ b/tests/local_testing/test_get_model_file.py @@ -1,4 +1,6 @@ import os, sys, traceback +import importlib.resources +import json sys.path.insert( 0, os.path.abspath("../..") @@ -6,7 +8,18 @@ sys.path.insert( import litellm import pytest -try: - print(litellm.get_model_cost_map(url="fake-url")) -except Exception as e: - pytest.fail(f"An exception occurred: {e}") + +def test_get_model_cost_map(): + try: + print(litellm.get_model_cost_map(url="fake-url")) + except Exception as e: + pytest.fail(f"An exception occurred: {e}") + + +def test_get_backup_model_cost_map(): + with importlib.resources.open_text( + "litellm", "model_prices_and_context_window_backup.json" + ) as f: + print("inside backup") + content = json.load(f) + print("content", content) diff --git a/tests/local_testing/test_prompt_caching.py b/tests/local_testing/test_prompt_caching.py index 301ead3aa..35d5e2588 100644 --- a/tests/local_testing/test_prompt_caching.py +++ b/tests/local_testing/test_prompt_caching.py @@ -111,3 +111,11 @@ def test_prompt_caching_model(model): # assert (response.usage.cache_read_input_tokens > 0) or ( # response.usage.cache_creation_input_tokens > 0 # ) + + +def test_supports_prompt_caching(): + from litellm.utils import supports_prompt_caching + + supports_pc = supports_prompt_caching(model="anthropic/claude-3-5-sonnet-20240620") + + assert supports_pc