forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (10/05/2024) (#6083)
* docs(prompt_caching.md): add prompt caching cost calc example to docs * docs(prompt_caching.md): add proxy examples to docs * feat(utils.py): expose new helper `supports_prompt_caching()` to check if a model supports prompt caching * docs(prompt_caching.md): add docs on checking model support for prompt caching * build: fix invalid json
This commit is contained in:
parent
fac3b2ee42
commit
f2c0a31e3c
7 changed files with 459 additions and 59 deletions
|
@ -1,3 +1,6 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Prompt Caching
|
||||
|
||||
For OpenAI + Anthropic + Deepseek, LiteLLM follows the OpenAI prompt caching usage object format:
|
||||
|
@ -30,6 +33,9 @@ For OpenAI + Anthropic + Deepseek, LiteLLM follows the OpenAI prompt caching usa
|
|||
|
||||
Note: OpenAI caching is only available for prompts containing 1024 tokens or more
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
import os
|
||||
|
@ -87,6 +93,90 @@ assert "prompt_tokens_details" in response.usage
|
|||
assert response.usage.prompt_tokens_details.cached_tokens > 0
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-4o
|
||||
litellm_params:
|
||||
model: openai/gpt-4o
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
```
|
||||
|
||||
2. Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
import os
|
||||
|
||||
client = OpenAI(
|
||||
api_key="LITELLM_PROXY_KEY", # sk-1234
|
||||
base_url="LITELLM_PROXY_BASE" # http://0.0.0.0:4000
|
||||
)
|
||||
|
||||
for _ in range(2):
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4o",
|
||||
messages=[
|
||||
# System Message
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Here is the full text of a complex legal agreement"
|
||||
* 400,
|
||||
}
|
||||
],
|
||||
},
|
||||
# marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What are the key terms and conditions in this agreement?",
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
|
||||
},
|
||||
# The final turn is marked with cache-control, for continuing in followups.
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What are the key terms and conditions in this agreement?",
|
||||
}
|
||||
],
|
||||
},
|
||||
],
|
||||
temperature=0.2,
|
||||
max_tokens=10,
|
||||
)
|
||||
|
||||
print("response=", response)
|
||||
print("response.usage=", response.usage)
|
||||
|
||||
assert "prompt_tokens_details" in response.usage
|
||||
assert response.usage.prompt_tokens_details.cached_tokens > 0
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
### Anthropic Example
|
||||
|
||||
Anthropic charges for cache writes.
|
||||
|
@ -95,6 +185,9 @@ Specify the content to cache with `"cache_control": {"type": "ephemeral"}`.
|
|||
|
||||
If you pass that in for any other llm provider, it will be ignored.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
import litellm
|
||||
|
@ -129,6 +222,65 @@ response = completion(
|
|||
|
||||
print(response.usage)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: claude-3-5-sonnet-20240620
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-5-sonnet-20240620
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
```
|
||||
|
||||
2. Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
import os
|
||||
|
||||
client = OpenAI(
|
||||
api_key="LITELLM_PROXY_KEY", # sk-1234
|
||||
base_url="LITELLM_PROXY_BASE" # http://0.0.0.0:4000
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="claude-3-5-sonnet-20240620",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "You are an AI assistant tasked with analyzing legal documents.",
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Here is the full text of a complex legal agreement" * 400,
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what are the key terms and conditions in this agreement?",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
print(response.usage)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
### Deepeek Example
|
||||
|
||||
|
@ -196,4 +348,155 @@ response_2 = litellm.completion(model=model_name, messages=message_2)
|
|||
|
||||
# Add any assertions here to check the response
|
||||
print(response_2.usage)
|
||||
```
|
||||
```
|
||||
|
||||
|
||||
## Calculate Cost
|
||||
|
||||
Cost cache-hit prompt tokens can differ from cache-miss prompt tokens.
|
||||
|
||||
Use the `completion_cost()` function for calculating cost ([handles prompt caching cost calculation](https://github.com/BerriAI/litellm/blob/f7ce1173f3315cc6cae06cf9bcf12e54a2a19705/litellm/llms/anthropic/cost_calculation.py#L12) as well). [**See more helper functions**](./token_usage.md)
|
||||
|
||||
```python
|
||||
cost = completion_cost(completion_response=response, model=model)
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm import completion, completion_cost
|
||||
import litellm
|
||||
import os
|
||||
|
||||
litellm.set_verbose = True # 👈 SEE RAW REQUEST
|
||||
os.environ["ANTHROPIC_API_KEY"] = ""
|
||||
model = "anthropic/claude-3-5-sonnet-20240620"
|
||||
response = completion(
|
||||
model=model,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "You are an AI assistant tasked with analyzing legal documents.",
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Here is the full text of a complex legal agreement" * 400,
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what are the key terms and conditions in this agreement?",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
print(response.usage)
|
||||
|
||||
cost = completion_cost(completion_response=response, model=model)
|
||||
|
||||
formatted_string = f"${float(cost):.10f}"
|
||||
print(formatted_string)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
LiteLLM returns the calculated cost in the response headers - `x-litellm-response-cost`
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(
|
||||
api_key="LITELLM_PROXY_KEY", # sk-1234..
|
||||
base_url="LITELLM_PROXY_BASE" # http://0.0.0.0:4000
|
||||
)
|
||||
response = client.chat.completions.with_raw_response.create(
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": "Say this is a test",
|
||||
}],
|
||||
model="gpt-3.5-turbo",
|
||||
)
|
||||
print(response.headers.get('x-litellm-response-cost'))
|
||||
|
||||
completion = response.parse() # get the object that `chat.completions.create()` would have returned
|
||||
print(completion)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Check Model Support
|
||||
|
||||
Check if a model supports prompt caching with `supports_prompt_caching()`
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm.utils import supports_prompt_caching
|
||||
|
||||
supports_pc: bool = supports_prompt_caching(model="anthropic/claude-3-5-sonnet-20240620")
|
||||
|
||||
assert supports_pc
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
Use the `/model/info` endpoint to check if a model on the proxy supports prompt caching
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: claude-3-5-sonnet-20240620
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-5-sonnet-20240620
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
```
|
||||
|
||||
2. Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
curl -L -X GET 'http://0.0.0.0:4000/v1/model/info' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
```
|
||||
|
||||
**Expected Response**
|
||||
|
||||
```bash
|
||||
{
|
||||
"data": [
|
||||
{
|
||||
"model_name": "claude-3-5-sonnet-20240620",
|
||||
"litellm_params": {
|
||||
"model": "anthropic/claude-3-5-sonnet-20240620"
|
||||
},
|
||||
"model_info": {
|
||||
"key": "claude-3-5-sonnet-20240620",
|
||||
...
|
||||
"supports_prompt_caching": true # 👈 LOOK FOR THIS!
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
This checks our maintained [model info/cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
|
|
@ -9,7 +9,8 @@
|
|||
"mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -19,7 +20,8 @@
|
|||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4o": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -129,7 +131,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4o-2024-05-13": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -141,7 +144,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4o-2024-08-06": {
|
||||
"max_tokens": 16384,
|
||||
|
@ -166,7 +170,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-0314": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -175,7 +180,8 @@
|
|||
"input_cost_per_token": 0.00003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-0613": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -185,7 +191,8 @@
|
|||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-32k": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -194,7 +201,8 @@
|
|||
"input_cost_per_token": 0.00006,
|
||||
"output_cost_per_token": 0.00012,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-32k-0314": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -203,7 +211,8 @@
|
|||
"input_cost_per_token": 0.00006,
|
||||
"output_cost_per_token": 0.00012,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-32k-0613": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -212,7 +221,8 @@
|
|||
"input_cost_per_token": 0.00006,
|
||||
"output_cost_per_token": 0.00012,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-turbo": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -224,7 +234,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-turbo-2024-04-09": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -236,7 +247,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-1106-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -247,7 +259,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-0125-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -258,7 +271,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-vision-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -268,7 +282,8 @@
|
|||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-1106-vision-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -278,7 +293,8 @@
|
|||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo": {
|
||||
"max_tokens": 4097,
|
||||
|
@ -288,7 +304,8 @@
|
|||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-0301": {
|
||||
"max_tokens": 4097,
|
||||
|
@ -297,7 +314,8 @@
|
|||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-0613": {
|
||||
"max_tokens": 4097,
|
||||
|
@ -307,7 +325,8 @@
|
|||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-1106": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -318,7 +337,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-0125": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -329,7 +349,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-16k": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -338,7 +359,8 @@
|
|||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-16k-0613": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -347,7 +369,8 @@
|
|||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"ft:gpt-3.5-turbo": {
|
||||
"max_tokens": 4096,
|
||||
|
|
|
@ -1,9 +1,5 @@
|
|||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["gcs_bucket"]
|
||||
- model_name: claude-3-5-sonnet-20240620
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-5-sonnet-20240620
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
|
@ -2179,6 +2179,40 @@ def supports_function_calling(
|
|||
)
|
||||
|
||||
|
||||
def supports_prompt_caching(
|
||||
model: str, custom_llm_provider: Optional[str] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Check if the given model supports prompt caching and return a boolean value.
|
||||
|
||||
Parameters:
|
||||
model (str): The model name to be checked.
|
||||
custom_llm_provider (Optional[str]): The provider to be checked.
|
||||
|
||||
Returns:
|
||||
bool: True if the model supports prompt caching, False otherwise.
|
||||
|
||||
Raises:
|
||||
Exception: If the given model is not found or there's an error in retrieval.
|
||||
"""
|
||||
try:
|
||||
model, custom_llm_provider, _, _ = litellm.get_llm_provider(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
model_info = litellm.get_model_info(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
if model_info.get("supports_prompt_caching", False) is True:
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"Model not found or error in checking prompt caching support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
def supports_vision(model: str, custom_llm_provider: Optional[str] = None) -> bool:
|
||||
"""
|
||||
Check if the given model supports vision and return a boolean value.
|
||||
|
|
|
@ -9,7 +9,8 @@
|
|||
"mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -19,7 +20,8 @@
|
|||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4o": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -129,7 +131,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4o-2024-05-13": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -141,7 +144,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4o-2024-08-06": {
|
||||
"max_tokens": 16384,
|
||||
|
@ -166,7 +170,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-0314": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -175,7 +180,8 @@
|
|||
"input_cost_per_token": 0.00003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-0613": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -185,7 +191,8 @@
|
|||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-32k": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -194,7 +201,8 @@
|
|||
"input_cost_per_token": 0.00006,
|
||||
"output_cost_per_token": 0.00012,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-32k-0314": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -203,7 +211,8 @@
|
|||
"input_cost_per_token": 0.00006,
|
||||
"output_cost_per_token": 0.00012,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-32k-0613": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -212,7 +221,8 @@
|
|||
"input_cost_per_token": 0.00006,
|
||||
"output_cost_per_token": 0.00012,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-turbo": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -224,7 +234,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-turbo-2024-04-09": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -236,7 +247,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-1106-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -247,7 +259,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-0125-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -258,7 +271,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-vision-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -268,7 +282,8 @@
|
|||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-1106-vision-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -278,7 +293,8 @@
|
|||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo": {
|
||||
"max_tokens": 4097,
|
||||
|
@ -288,7 +304,8 @@
|
|||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-0301": {
|
||||
"max_tokens": 4097,
|
||||
|
@ -297,7 +314,8 @@
|
|||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-0613": {
|
||||
"max_tokens": 4097,
|
||||
|
@ -307,7 +325,8 @@
|
|||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-1106": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -318,7 +337,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-0125": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -329,7 +349,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-16k": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -338,7 +359,8 @@
|
|||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-16k-0613": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -347,7 +369,8 @@
|
|||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"ft:gpt-3.5-turbo": {
|
||||
"max_tokens": 4096,
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
import os, sys, traceback
|
||||
import importlib.resources
|
||||
import json
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
|
@ -6,7 +8,18 @@ sys.path.insert(
|
|||
import litellm
|
||||
import pytest
|
||||
|
||||
try:
|
||||
print(litellm.get_model_cost_map(url="fake-url"))
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred: {e}")
|
||||
|
||||
def test_get_model_cost_map():
|
||||
try:
|
||||
print(litellm.get_model_cost_map(url="fake-url"))
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred: {e}")
|
||||
|
||||
|
||||
def test_get_backup_model_cost_map():
|
||||
with importlib.resources.open_text(
|
||||
"litellm", "model_prices_and_context_window_backup.json"
|
||||
) as f:
|
||||
print("inside backup")
|
||||
content = json.load(f)
|
||||
print("content", content)
|
||||
|
|
|
@ -111,3 +111,11 @@ def test_prompt_caching_model(model):
|
|||
# assert (response.usage.cache_read_input_tokens > 0) or (
|
||||
# response.usage.cache_creation_input_tokens > 0
|
||||
# )
|
||||
|
||||
|
||||
def test_supports_prompt_caching():
|
||||
from litellm.utils import supports_prompt_caching
|
||||
|
||||
supports_pc = supports_prompt_caching(model="anthropic/claude-3-5-sonnet-20240620")
|
||||
|
||||
assert supports_pc
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue