diff --git a/Dockerfile b/Dockerfile
index c8e9956b29..bd840eaf54 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -62,6 +62,11 @@ COPY --from=builder /wheels/ /wheels/
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
# Generate prisma client
+ENV PRISMA_BINARY_CACHE_DIR=/app/prisma
+RUN mkdir -p /.cache
+RUN chmod -R 777 /.cache
+RUN pip install nodejs-bin
+RUN pip install prisma
RUN prisma generate
RUN chmod +x entrypoint.sh
diff --git a/Dockerfile.database b/Dockerfile.database
index 22084bab89..c995939e5b 100644
--- a/Dockerfile.database
+++ b/Dockerfile.database
@@ -62,6 +62,11 @@ RUN pip install PyJWT --no-cache-dir
RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh
# Generate prisma client
+ENV PRISMA_BINARY_CACHE_DIR=/app/prisma
+RUN mkdir -p /.cache
+RUN chmod -R 777 /.cache
+RUN pip install nodejs-bin
+RUN pip install prisma
RUN prisma generate
RUN chmod +x entrypoint.sh
diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md
index 2227b7a6b5..2a7804bfda 100644
--- a/docs/my-website/docs/providers/anthropic.md
+++ b/docs/my-website/docs/providers/anthropic.md
@@ -225,22 +225,336 @@ print(response)
| claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
-## Passing Extra Headers to Anthropic API
+## **Prompt Caching**
-Pass `extra_headers: dict` to `litellm.completion`
+Use Anthropic Prompt Caching
-```python
-from litellm import completion
-messages = [{"role": "user", "content": "What is Anthropic?"}]
-response = completion(
- model="claude-3-5-sonnet-20240620",
- messages=messages,
- extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"}
+
+[Relevant Anthropic API Docs](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching)
+
+### Caching - Large Context Caching
+
+This example demonstrates basic Prompt Caching usage, caching the full text of the legal agreement as a prefix while keeping the user instruction uncached.
+
+
+
+
+```python
+response = await litellm.acompletion(
+ model="anthropic/claude-3-5-sonnet-20240620",
+ messages=[
+ {
+ "role": "system",
+ "content": [
+ {
+ "type": "text",
+ "text": "You are an AI assistant tasked with analyzing legal documents.",
+ },
+ {
+ "type": "text",
+ "text": "Here is the full text of a complex legal agreement",
+ "cache_control": {"type": "ephemeral"},
+ },
+ ],
+ },
+ {
+ "role": "user",
+ "content": "what are the key terms and conditions in this agreement?",
+ },
+ ],
+ extra_headers={
+ "anthropic-version": "2023-06-01",
+ "anthropic-beta": "prompt-caching-2024-07-31",
+ },
+)
+
+```
+
+
+
+:::info
+
+LiteLLM Proxy is OpenAI compatible
+
+This is an example using the OpenAI Python SDK sending a request to LiteLLM Proxy
+
+Assuming you have a model=`anthropic/claude-3-5-sonnet-20240620` on the [litellm proxy config.yaml](#usage-with-litellm-proxy)
+
+:::
+
+```python
+import openai
+client = openai.AsyncOpenAI(
+ api_key="anything", # litellm proxy api key
+ base_url="http://0.0.0.0:4000" # litellm proxy base url
+)
+
+
+response = await client.chat.completions.create(
+ model="anthropic/claude-3-5-sonnet-20240620",
+ messages=[
+ {
+ "role": "system",
+ "content": [
+ {
+ "type": "text",
+ "text": "You are an AI assistant tasked with analyzing legal documents.",
+ },
+ {
+ "type": "text",
+ "text": "Here is the full text of a complex legal agreement",
+ "cache_control": {"type": "ephemeral"},
+ },
+ ],
+ },
+ {
+ "role": "user",
+ "content": "what are the key terms and conditions in this agreement?",
+ },
+ ],
+ extra_headers={
+ "anthropic-version": "2023-06-01",
+ "anthropic-beta": "prompt-caching-2024-07-31",
+ },
+)
+
+```
+
+
+
+
+### Caching - Tools definitions
+
+In this example, we demonstrate caching tool definitions.
+
+The cache_control parameter is placed on the final tool
+
+
+
+
+```python
+import litellm
+
+response = await litellm.acompletion(
+ model="anthropic/claude-3-5-sonnet-20240620",
+ messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
+ tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA",
+ },
+ "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+ },
+ "required": ["location"],
+ },
+ "cache_control": {"type": "ephemeral"}
+ },
+ }
+ ],
+ extra_headers={
+ "anthropic-version": "2023-06-01",
+ "anthropic-beta": "prompt-caching-2024-07-31",
+ },
)
```
-## Advanced
+
+
-## Usage - Function Calling
+:::info
+
+LiteLLM Proxy is OpenAI compatible
+
+This is an example using the OpenAI Python SDK sending a request to LiteLLM Proxy
+
+Assuming you have a model=`anthropic/claude-3-5-sonnet-20240620` on the [litellm proxy config.yaml](#usage-with-litellm-proxy)
+
+:::
+
+```python
+import openai
+client = openai.AsyncOpenAI(
+ api_key="anything", # litellm proxy api key
+ base_url="http://0.0.0.0:4000" # litellm proxy base url
+)
+
+response = await client.chat.completions.create(
+ model="anthropic/claude-3-5-sonnet-20240620",
+ messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
+ tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA",
+ },
+ "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+ },
+ "required": ["location"],
+ },
+ "cache_control": {"type": "ephemeral"}
+ },
+ }
+ ],
+ extra_headers={
+ "anthropic-version": "2023-06-01",
+ "anthropic-beta": "prompt-caching-2024-07-31",
+ },
+)
+```
+
+
+
+
+
+### Caching - Continuing Multi-Turn Convo
+
+In this example, we demonstrate how to use Prompt Caching in a multi-turn conversation.
+
+The cache_control parameter is placed on the system message to designate it as part of the static prefix.
+
+The conversation history (previous messages) is included in the messages array. The final turn is marked with cache-control, for continuing in followups. The second-to-last user message is marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
+
+
+
+
+```python
+import litellm
+
+response = await litellm.acompletion(
+ model="anthropic/claude-3-5-sonnet-20240620",
+ messages=[
+ # System Message
+ {
+ "role": "system",
+ "content": [
+ {
+ "type": "text",
+ "text": "Here is the full text of a complex legal agreement"
+ * 400,
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ },
+ # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "What are the key terms and conditions in this agreement?",
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ },
+ {
+ "role": "assistant",
+ "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
+ },
+ # The final turn is marked with cache-control, for continuing in followups.
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "What are the key terms and conditions in this agreement?",
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ },
+ ],
+ extra_headers={
+ "anthropic-version": "2023-06-01",
+ "anthropic-beta": "prompt-caching-2024-07-31",
+ },
+)
+```
+
+
+
+:::info
+
+LiteLLM Proxy is OpenAI compatible
+
+This is an example using the OpenAI Python SDK sending a request to LiteLLM Proxy
+
+Assuming you have a model=`anthropic/claude-3-5-sonnet-20240620` on the [litellm proxy config.yaml](#usage-with-litellm-proxy)
+
+:::
+
+```python
+import openai
+client = openai.AsyncOpenAI(
+ api_key="anything", # litellm proxy api key
+ base_url="http://0.0.0.0:4000" # litellm proxy base url
+)
+
+response = await client.chat.completions.create(
+ model="anthropic/claude-3-5-sonnet-20240620",
+ messages=[
+ # System Message
+ {
+ "role": "system",
+ "content": [
+ {
+ "type": "text",
+ "text": "Here is the full text of a complex legal agreement"
+ * 400,
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ },
+ # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "What are the key terms and conditions in this agreement?",
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ },
+ {
+ "role": "assistant",
+ "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
+ },
+ # The final turn is marked with cache-control, for continuing in followups.
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "What are the key terms and conditions in this agreement?",
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ },
+ ],
+ extra_headers={
+ "anthropic-version": "2023-06-01",
+ "anthropic-beta": "prompt-caching-2024-07-31",
+ },
+)
+```
+
+
+
+
+## **Function/Tool Calling**
:::info
@@ -429,6 +743,20 @@ resp = litellm.completion(
print(f"\nResponse: {resp}")
```
+## **Passing Extra Headers to Anthropic API**
+
+Pass `extra_headers: dict` to `litellm.completion`
+
+```python
+from litellm import completion
+messages = [{"role": "user", "content": "What is Anthropic?"}]
+response = completion(
+ model="claude-3-5-sonnet-20240620",
+ messages=messages,
+ extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"}
+)
+```
+
## Usage - "Assistant Pre-fill"
You can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array.
diff --git a/docs/my-website/docs/proxy/model_management.md b/docs/my-website/docs/proxy/model_management.md
index 02ce4ba23b..a8cc66ae76 100644
--- a/docs/my-website/docs/proxy/model_management.md
+++ b/docs/my-website/docs/proxy/model_management.md
@@ -17,7 +17,7 @@ model_list:
## Get Model Information - `/model/info`
-Retrieve detailed information about each model listed in the `/model/info` endpoint, including descriptions from the `config.yaml` file, and additional model info (e.g. max tokens, cost per input token, etc.) pulled the model_info you set and the litellm model cost map. Sensitive details like API keys are excluded for security purposes.
+Retrieve detailed information about each model listed in the `/model/info` endpoint, including descriptions from the `config.yaml` file, and additional model info (e.g. max tokens, cost per input token, etc.) pulled from the model_info you set and the [litellm model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json). Sensitive details like API keys are excluded for security purposes.
-
+
+
```bash
curl -X POST "http://0.0.0.0:4000/model/new" \
- -H "accept: application/json" \
- -H "Content-Type: application/json" \
- -d '{ "model_name": "azure-gpt-turbo", "litellm_params": {"model": "azure/gpt-3.5-turbo", "api_key": "os.environ/AZURE_API_KEY", "api_base": "my-azure-api-base"} }'
+ -H "accept: application/json" \
+ -H "Content-Type: application/json" \
+ -d '{ "model_name": "azure-gpt-turbo", "litellm_params": {"model": "azure/gpt-3.5-turbo", "api_key": "os.environ/AZURE_API_KEY", "api_base": "my-azure-api-base"} }'
```
-
+
+
+
+```yaml
+model_list:
+ - model_name: gpt-3.5-turbo ### RECEIVED MODEL NAME ### `openai.chat.completions.create(model="gpt-3.5-turbo",...)`
+ litellm_params: # all params accepted by litellm.completion() - https://github.com/BerriAI/litellm/blob/9b46ec05b02d36d6e4fb5c32321e51e7f56e4a6e/litellm/types/router.py#L297
+ model: azure/gpt-turbo-small-eu ### MODEL NAME sent to `litellm.completion()` ###
+ api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
+ api_key: "os.environ/AZURE_API_KEY_EU" # does os.getenv("AZURE_API_KEY_EU")
+ rpm: 6 # [OPTIONAL] Rate limit for this deployment: in requests per minute (rpm)
+ model_info:
+ my_custom_key: my_custom_value # additional model metadata
+```
+
+
@@ -85,4 +96,83 @@ Keep in mind that as both endpoints are in [BETA], you may need to visit the ass
- Get Model Information: [Issue #933](https://github.com/BerriAI/litellm/issues/933)
- Add a New Model: [Issue #964](https://github.com/BerriAI/litellm/issues/964)
-Feedback on the beta endpoints is valuable and helps improve the API for all users.
\ No newline at end of file
+Feedback on the beta endpoints is valuable and helps improve the API for all users.
+
+
+## Add Additional Model Information
+
+If you want the ability to add a display name, description, and labels for models, just use `model_info:`
+
+```yaml
+model_list:
+ - model_name: "gpt-4"
+ litellm_params:
+ model: "gpt-4"
+ api_key: "os.environ/OPENAI_API_KEY"
+ model_info: # 👈 KEY CHANGE
+ my_custom_key: "my_custom_value"
+```
+
+### Usage
+
+1. Add additional information to model
+
+```yaml
+model_list:
+ - model_name: "gpt-4"
+ litellm_params:
+ model: "gpt-4"
+ api_key: "os.environ/OPENAI_API_KEY"
+ model_info: # 👈 KEY CHANGE
+ my_custom_key: "my_custom_value"
+```
+
+2. Call with `/model/info`
+
+Use a key with access to the model `gpt-4`.
+
+```bash
+curl -L -X GET 'http://0.0.0.0:4000/v1/model/info' \
+-H 'Authorization: Bearer LITELLM_KEY' \
+```
+
+3. **Expected Response**
+
+Returned `model_info = Your custom model_info + (if exists) LITELLM MODEL INFO`
+
+
+[**How LiteLLM Model Info is found**](https://github.com/BerriAI/litellm/blob/9b46ec05b02d36d6e4fb5c32321e51e7f56e4a6e/litellm/proxy/proxy_server.py#L7460)
+
+[Tell us how this can be improved!](https://github.com/BerriAI/litellm/issues)
+
+```bash
+{
+ "data": [
+ {
+ "model_name": "gpt-4",
+ "litellm_params": {
+ "model": "gpt-4"
+ },
+ "model_info": {
+ "id": "e889baacd17f591cce4c63639275ba5e8dc60765d6c553e6ee5a504b19e50ddc",
+ "db_model": false,
+ "my_custom_key": "my_custom_value", # 👈 CUSTOM INFO
+ "key": "gpt-4", # 👈 KEY in LiteLLM MODEL INFO/COST MAP - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
+ "max_tokens": 4096,
+ "max_input_tokens": 8192,
+ "max_output_tokens": 4096,
+ "input_cost_per_token": 3e-05,
+ "input_cost_per_character": null,
+ "input_cost_per_token_above_128k_tokens": null,
+ "output_cost_per_token": 6e-05,
+ "output_cost_per_character": null,
+ "output_cost_per_token_above_128k_tokens": null,
+ "output_cost_per_character_above_128k_tokens": null,
+ "output_vector_size": null,
+ "litellm_provider": "openai",
+ "mode": "chat"
+ }
+ },
+ ]
+}
+```
diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md
index 6c856f58b3..4b913d2e82 100644
--- a/docs/my-website/docs/proxy/prometheus.md
+++ b/docs/my-website/docs/proxy/prometheus.md
@@ -72,15 +72,15 @@ http://localhost:4000/metrics
| Metric Name | Description |
|----------------------|--------------------------------------|
-| `deployment_state` | The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage. |
+| `litellm_deployment_state` | The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage. |
| `litellm_remaining_requests_metric` | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment |
| `litellm_remaining_tokens` | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment |
- `llm_deployment_success_responses` | Total number of successful LLM API calls for deployment |
-| `llm_deployment_failure_responses` | Total number of failed LLM API calls for deployment |
-| `llm_deployment_total_requests` | Total number of LLM API calls for deployment - success + failure |
-| `llm_deployment_latency_per_output_token` | Latency per output token for deployment |
-| `llm_deployment_successful_fallbacks` | Number of successful fallback requests from primary model -> fallback model |
-| `llm_deployment_failed_fallbacks` | Number of failed fallback requests from primary model -> fallback model |
+ `litellm_deployment_success_responses` | Total number of successful LLM API calls for deployment |
+| `litellm_deployment_failure_responses` | Total number of failed LLM API calls for deployment |
+| `litellm_deployment_total_requests` | Total number of LLM API calls for deployment - success + failure |
+| `litellm_deployment_latency_per_output_token` | Latency per output token for deployment |
+| `litellm_deployment_successful_fallbacks` | Number of successful fallback requests from primary model -> fallback model |
+| `litellm_deployment_failed_fallbacks` | Number of failed fallback requests from primary model -> fallback model |
diff --git a/litellm/integrations/gcs_bucket.py b/litellm/integrations/gcs_bucket.py
index 46f55f8f01..be7f8e39c2 100644
--- a/litellm/integrations/gcs_bucket.py
+++ b/litellm/integrations/gcs_bucket.py
@@ -1,5 +1,6 @@
import json
import os
+import uuid
from datetime import datetime
from typing import Any, Dict, List, Optional, TypedDict, Union
@@ -29,6 +30,8 @@ class GCSBucketPayload(TypedDict):
end_time: str
response_cost: Optional[float]
spend_log_metadata: str
+ exception: Optional[str]
+ log_event_type: Optional[str]
class GCSBucketLogger(CustomLogger):
@@ -79,6 +82,7 @@ class GCSBucketLogger(CustomLogger):
logging_payload: GCSBucketPayload = await self.get_gcs_payload(
kwargs, response_obj, start_time_str, end_time_str
)
+ logging_payload["log_event_type"] = "successful_api_call"
json_logged_payload = json.dumps(logging_payload)
@@ -103,7 +107,56 @@ class GCSBucketLogger(CustomLogger):
verbose_logger.error("GCS Bucket logging error: %s", str(e))
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
- pass
+ from litellm.proxy.proxy_server import premium_user
+
+ if premium_user is not True:
+ raise ValueError(
+ f"GCS Bucket logging is a premium feature. Please upgrade to use it. {CommonProxyErrors.not_premium_user.value}"
+ )
+ try:
+ verbose_logger.debug(
+ "GCS Logger: async_log_failure_event logging kwargs: %s, response_obj: %s",
+ kwargs,
+ response_obj,
+ )
+
+ start_time_str = start_time.strftime("%Y-%m-%d %H:%M:%S")
+ end_time_str = end_time.strftime("%Y-%m-%d %H:%M:%S")
+ headers = await self.construct_request_headers()
+
+ logging_payload: GCSBucketPayload = await self.get_gcs_payload(
+ kwargs, response_obj, start_time_str, end_time_str
+ )
+ logging_payload["log_event_type"] = "failed_api_call"
+
+ _litellm_params = kwargs.get("litellm_params") or {}
+ metadata = _litellm_params.get("metadata") or {}
+
+ json_logged_payload = json.dumps(logging_payload)
+
+ # Get the current date
+ current_date = datetime.now().strftime("%Y-%m-%d")
+
+ # Modify the object_name to include the date-based folder
+ object_name = f"{current_date}/failure-{uuid.uuid4().hex}"
+
+ if "gcs_log_id" in metadata:
+ object_name = metadata["gcs_log_id"]
+
+ response = await self.async_httpx_client.post(
+ headers=headers,
+ url=f"https://storage.googleapis.com/upload/storage/v1/b/{self.BUCKET_NAME}/o?uploadType=media&name={object_name}",
+ data=json_logged_payload,
+ )
+
+ if response.status_code != 200:
+ verbose_logger.error("GCS Bucket logging error: %s", str(response.text))
+
+ verbose_logger.debug("GCS Bucket response %s", response)
+ verbose_logger.debug("GCS Bucket status code %s", response.status_code)
+ verbose_logger.debug("GCS Bucket response.text %s", response.text)
+ except Exception as e:
+ verbose_logger.error("GCS Bucket logging error: %s", str(e))
async def construct_request_headers(self) -> Dict[str, str]:
from litellm import vertex_chat_completion
@@ -139,9 +192,18 @@ class GCSBucketLogger(CustomLogger):
optional_params=kwargs.get("optional_params", None),
)
response_dict = {}
- response_dict = convert_litellm_response_object_to_dict(
- response_obj=response_obj
- )
+ if response_obj:
+ response_dict = convert_litellm_response_object_to_dict(
+ response_obj=response_obj
+ )
+
+ exception_str = None
+
+ # Handle logging exception attributes
+ if "exception" in kwargs:
+ exception_str = kwargs.get("exception", "")
+ if not isinstance(exception_str, str):
+ exception_str = str(exception_str)
_spend_log_payload: SpendLogsPayload = get_logging_payload(
kwargs=kwargs,
@@ -156,8 +218,10 @@ class GCSBucketLogger(CustomLogger):
response_obj=response_dict,
start_time=start_time,
end_time=end_time,
- spend_log_metadata=_spend_log_payload["metadata"],
+ spend_log_metadata=_spend_log_payload.get("metadata", ""),
response_cost=kwargs.get("response_cost", None),
+ exception=exception_str,
+ log_event_type=None,
)
return gcs_payload
diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
index 8797807ac6..08431fd7af 100644
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@@ -141,42 +141,42 @@ class PrometheusLogger(CustomLogger):
]
# Metric for deployment state
- self.deployment_state = Gauge(
- "deployment_state",
+ self.litellm_deployment_state = Gauge(
+ "litellm_deployment_state",
"LLM Deployment Analytics - The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage",
labelnames=_logged_llm_labels,
)
- self.llm_deployment_success_responses = Counter(
- name="llm_deployment_success_responses",
+ self.litellm_deployment_success_responses = Counter(
+ name="litellm_deployment_success_responses",
documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
labelnames=_logged_llm_labels,
)
- self.llm_deployment_failure_responses = Counter(
- name="llm_deployment_failure_responses",
+ self.litellm_deployment_failure_responses = Counter(
+ name="litellm_deployment_failure_responses",
documentation="LLM Deployment Analytics - Total number of failed LLM API calls via litellm",
labelnames=_logged_llm_labels,
)
- self.llm_deployment_total_requests = Counter(
- name="llm_deployment_total_requests",
+ self.litellm_deployment_total_requests = Counter(
+ name="litellm_deployment_total_requests",
documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
labelnames=_logged_llm_labels,
)
# Deployment Latency tracking
- self.llm_deployment_latency_per_output_token = Histogram(
- name="llm_deployment_latency_per_output_token",
+ self.litellm_deployment_latency_per_output_token = Histogram(
+ name="litellm_deployment_latency_per_output_token",
documentation="LLM Deployment Analytics - Latency per output token",
labelnames=_logged_llm_labels,
)
- self.llm_deployment_successful_fallbacks = Counter(
- "llm_deployment_successful_fallbacks",
+ self.litellm_deployment_successful_fallbacks = Counter(
+ "litellm_deployment_successful_fallbacks",
"LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model",
["primary_model", "fallback_model"],
)
- self.llm_deployment_failed_fallbacks = Counter(
- "llm_deployment_failed_fallbacks",
+ self.litellm_deployment_failed_fallbacks = Counter(
+ "litellm_deployment_failed_fallbacks",
"LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model",
["primary_model", "fallback_model"],
)
@@ -358,14 +358,14 @@ class PrometheusLogger(CustomLogger):
api_provider=llm_provider,
)
- self.llm_deployment_failure_responses.labels(
+ self.litellm_deployment_failure_responses.labels(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
api_provider=llm_provider,
).inc()
- self.llm_deployment_total_requests.labels(
+ self.litellm_deployment_total_requests.labels(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
@@ -438,14 +438,14 @@ class PrometheusLogger(CustomLogger):
api_provider=llm_provider,
)
- self.llm_deployment_success_responses.labels(
+ self.litellm_deployment_success_responses.labels(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
api_provider=llm_provider,
).inc()
- self.llm_deployment_total_requests.labels(
+ self.litellm_deployment_total_requests.labels(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
@@ -475,7 +475,7 @@ class PrometheusLogger(CustomLogger):
latency_per_token = None
if output_tokens is not None and output_tokens > 0:
latency_per_token = _latency_seconds / output_tokens
- self.llm_deployment_latency_per_output_token.labels(
+ self.litellm_deployment_latency_per_output_token.labels(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
@@ -497,7 +497,7 @@ class PrometheusLogger(CustomLogger):
kwargs,
)
_new_model = kwargs.get("model")
- self.llm_deployment_successful_fallbacks.labels(
+ self.litellm_deployment_successful_fallbacks.labels(
primary_model=original_model_group, fallback_model=_new_model
).inc()
@@ -508,11 +508,11 @@ class PrometheusLogger(CustomLogger):
kwargs,
)
_new_model = kwargs.get("model")
- self.llm_deployment_failed_fallbacks.labels(
+ self.litellm_deployment_failed_fallbacks.labels(
primary_model=original_model_group, fallback_model=_new_model
).inc()
- def set_deployment_state(
+ def set_litellm_deployment_state(
self,
state: int,
litellm_model_name: str,
@@ -520,7 +520,7 @@ class PrometheusLogger(CustomLogger):
api_base: str,
api_provider: str,
):
- self.deployment_state.labels(
+ self.litellm_deployment_state.labels(
litellm_model_name, model_id, api_base, api_provider
).set(state)
@@ -531,7 +531,7 @@ class PrometheusLogger(CustomLogger):
api_base: str,
api_provider: str,
):
- self.set_deployment_state(
+ self.set_litellm_deployment_state(
0, litellm_model_name, model_id, api_base, api_provider
)
@@ -542,7 +542,7 @@ class PrometheusLogger(CustomLogger):
api_base: str,
api_provider: str,
):
- self.set_deployment_state(
+ self.set_litellm_deployment_state(
1, litellm_model_name, model_id, api_base, api_provider
)
@@ -553,7 +553,7 @@ class PrometheusLogger(CustomLogger):
api_base: str,
api_provider: str,
):
- self.set_deployment_state(
+ self.set_litellm_deployment_state(
2, litellm_model_name, model_id, api_base, api_provider
)
diff --git a/litellm/integrations/prometheus_helpers/prometheus_api.py b/litellm/integrations/prometheus_helpers/prometheus_api.py
index 86764df7dd..13ccc15620 100644
--- a/litellm/integrations/prometheus_helpers/prometheus_api.py
+++ b/litellm/integrations/prometheus_helpers/prometheus_api.py
@@ -41,8 +41,8 @@ async def get_fallback_metric_from_prometheus():
"""
response_message = ""
relevant_metrics = [
- "llm_deployment_successful_fallbacks_total",
- "llm_deployment_failed_fallbacks_total",
+ "litellm_deployment_successful_fallbacks_total",
+ "litellm_deployment_failed_fallbacks_total",
]
for metric in relevant_metrics:
response_json = await get_metric_from_prometheus(
diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py
index 6f05aa226e..cf58163461 100644
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@@ -35,6 +35,7 @@ from litellm.types.llms.anthropic import (
AnthropicResponseContentBlockText,
AnthropicResponseContentBlockToolUse,
AnthropicResponseUsageBlock,
+ AnthropicSystemMessageContent,
ContentBlockDelta,
ContentBlockStart,
ContentBlockStop,
@@ -759,6 +760,7 @@ class AnthropicChatCompletion(BaseLLM):
## CALCULATING USAGE
prompt_tokens = completion_response["usage"]["input_tokens"]
completion_tokens = completion_response["usage"]["output_tokens"]
+ _usage = completion_response["usage"]
total_tokens = prompt_tokens + completion_tokens
model_response.created = int(time.time())
@@ -768,6 +770,11 @@ class AnthropicChatCompletion(BaseLLM):
completion_tokens=completion_tokens,
total_tokens=total_tokens,
)
+
+ if "cache_creation_input_tokens" in _usage:
+ usage["cache_creation_input_tokens"] = _usage["cache_creation_input_tokens"]
+ if "cache_read_input_tokens" in _usage:
+ usage["cache_read_input_tokens"] = _usage["cache_read_input_tokens"]
setattr(model_response, "usage", usage) # type: ignore
return model_response
@@ -901,6 +908,7 @@ class AnthropicChatCompletion(BaseLLM):
# Separate system prompt from rest of message
system_prompt_indices = []
system_prompt = ""
+ anthropic_system_message_list = None
for idx, message in enumerate(messages):
if message["role"] == "system":
valid_content: bool = False
@@ -908,8 +916,23 @@ class AnthropicChatCompletion(BaseLLM):
system_prompt += message["content"]
valid_content = True
elif isinstance(message["content"], list):
- for content in message["content"]:
- system_prompt += content.get("text", "")
+ for _content in message["content"]:
+ anthropic_system_message_content = (
+ AnthropicSystemMessageContent(
+ type=_content.get("type"),
+ text=_content.get("text"),
+ )
+ )
+ if "cache_control" in _content:
+ anthropic_system_message_content["cache_control"] = (
+ _content["cache_control"]
+ )
+
+ if anthropic_system_message_list is None:
+ anthropic_system_message_list = []
+ anthropic_system_message_list.append(
+ anthropic_system_message_content
+ )
valid_content = True
if valid_content:
@@ -919,6 +942,10 @@ class AnthropicChatCompletion(BaseLLM):
messages.pop(idx)
if len(system_prompt) > 0:
optional_params["system"] = system_prompt
+
+ # Handling anthropic API Prompt Caching
+ if anthropic_system_message_list is not None:
+ optional_params["system"] = anthropic_system_message_list
# Format rest of message according to anthropic guidelines
try:
messages = prompt_factory(
@@ -954,6 +981,8 @@ class AnthropicChatCompletion(BaseLLM):
else: # assume openai tool call
new_tool = tool["function"]
new_tool["input_schema"] = new_tool.pop("parameters") # rename key
+ if "cache_control" in tool:
+ new_tool["cache_control"] = tool["cache_control"]
anthropic_tools.append(new_tool)
optional_params["tools"] = anthropic_tools
diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py
index b0dd5d905a..ea84fa95cf 100644
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@@ -356,6 +356,7 @@ def ollama_completion_stream(url, api_key, data, logging_obj):
"json": data,
"method": "POST",
"timeout": litellm.request_timeout,
+ "follow_redirects": True
}
if api_key is not None:
_request["headers"] = {"Authorization": "Bearer {}".format(api_key)}
diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index 7c3c7e80fb..f81515e98d 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -1224,6 +1224,19 @@ def convert_to_anthropic_tool_invoke(
return anthropic_tool_invoke
+def add_cache_control_to_content(
+ anthropic_content_element: Union[
+ dict, AnthropicMessagesImageParam, AnthropicMessagesTextParam
+ ],
+ orignal_content_element: dict,
+):
+ if "cache_control" in orignal_content_element:
+ anthropic_content_element["cache_control"] = orignal_content_element[
+ "cache_control"
+ ]
+ return anthropic_content_element
+
+
def anthropic_messages_pt(
messages: list,
model: str,
@@ -1264,18 +1277,31 @@ def anthropic_messages_pt(
image_chunk = convert_to_anthropic_image_obj(
m["image_url"]["url"]
)
- user_content.append(
- AnthropicMessagesImageParam(
- type="image",
- source=AnthropicImageParamSource(
- type="base64",
- media_type=image_chunk["media_type"],
- data=image_chunk["data"],
- ),
- )
+
+ _anthropic_content_element = AnthropicMessagesImageParam(
+ type="image",
+ source=AnthropicImageParamSource(
+ type="base64",
+ media_type=image_chunk["media_type"],
+ data=image_chunk["data"],
+ ),
)
+
+ anthropic_content_element = add_cache_control_to_content(
+ anthropic_content_element=_anthropic_content_element,
+ orignal_content_element=m,
+ )
+ user_content.append(anthropic_content_element)
elif m.get("type", "") == "text":
- user_content.append({"type": "text", "text": m["text"]})
+ _anthropic_text_content_element = {
+ "type": "text",
+ "text": m["text"],
+ }
+ anthropic_content_element = add_cache_control_to_content(
+ anthropic_content_element=_anthropic_text_content_element,
+ orignal_content_element=m,
+ )
+ user_content.append(anthropic_content_element)
elif (
messages[msg_i]["role"] == "tool"
or messages[msg_i]["role"] == "function"
@@ -1306,6 +1332,10 @@ def anthropic_messages_pt(
anthropic_message = AnthropicMessagesTextParam(
type="text", text=m.get("text")
)
+ anthropic_message = add_cache_control_to_content(
+ anthropic_content_element=anthropic_message,
+ orignal_content_element=m,
+ )
assistant_content.append(anthropic_message)
elif (
"content" in messages[msg_i]
@@ -1313,9 +1343,17 @@ def anthropic_messages_pt(
and len(messages[msg_i]["content"])
> 0 # don't pass empty text blocks. anthropic api raises errors.
):
- assistant_content.append(
- {"type": "text", "text": messages[msg_i]["content"]}
+
+ _anthropic_text_content_element = {
+ "type": "text",
+ "text": messages[msg_i]["content"],
+ }
+
+ anthropic_content_element = add_cache_control_to_content(
+ anthropic_content_element=_anthropic_text_content_element,
+ orignal_content_element=messages[msg_i],
)
+ assistant_content.append(anthropic_content_element)
if messages[msg_i].get(
"tool_calls", []
@@ -1701,12 +1739,14 @@ def cohere_messages_pt_v2(
assistant_tool_calls: List[ToolCallObject] = []
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
- assistant_text = (
- messages[msg_i].get("content") or ""
- ) # either string or none
- if assistant_text:
- assistant_content += assistant_text
-
+ if isinstance(messages[msg_i]["content"], list):
+ for m in messages[msg_i]["content"]:
+ if m.get("type", "") == "text":
+ assistant_content += m["text"]
+ elif messages[msg_i].get("content") is not None and isinstance(
+ messages[msg_i]["content"], str
+ ):
+ assistant_content += messages[msg_i]["content"]
if messages[msg_i].get(
"tool_calls", []
): # support assistant tool invoke conversion
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index e31e6b3f4f..d30270c5c8 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -2074,7 +2074,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
- "supports_vision": true
+ "supports_vision": true,
+ "supports_assistant_prefill": true
},
"vertex_ai/claude-3-5-sonnet@20240620": {
"max_tokens": 4096,
@@ -2085,7 +2086,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
- "supports_vision": true
+ "supports_vision": true,
+ "supports_assistant_prefill": true
},
"vertex_ai/claude-3-haiku@20240307": {
"max_tokens": 4096,
@@ -2096,7 +2098,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
- "supports_vision": true
+ "supports_vision": true,
+ "supports_assistant_prefill": true
},
"vertex_ai/claude-3-opus@20240229": {
"max_tokens": 4096,
@@ -2107,7 +2110,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
- "supports_vision": true
+ "supports_vision": true,
+ "supports_assistant_prefill": true
},
"vertex_ai/meta/llama3-405b-instruct-maas": {
"max_tokens": 32000,
@@ -4531,6 +4535,69 @@
"litellm_provider": "perplexity",
"mode": "chat"
},
+ "perplexity/llama-3.1-70b-instruct": {
+ "max_tokens": 131072,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000001,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
+ "perplexity/llama-3.1-8b-instruct": {
+ "max_tokens": 131072,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "input_cost_per_token": 0.0000002,
+ "output_cost_per_token": 0.0000002,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
+ "perplexity/llama-3.1-sonar-huge-128k-online": {
+ "max_tokens": 127072,
+ "max_input_tokens": 127072,
+ "max_output_tokens": 127072,
+ "input_cost_per_token": 0.000005,
+ "output_cost_per_token": 0.000005,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
+ "perplexity/llama-3.1-sonar-large-128k-online": {
+ "max_tokens": 127072,
+ "max_input_tokens": 127072,
+ "max_output_tokens": 127072,
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000001,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
+ "perplexity/llama-3.1-sonar-large-128k-chat": {
+ "max_tokens": 131072,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000001,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
+ "perplexity/llama-3.1-sonar-small-128k-chat": {
+ "max_tokens": 131072,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "input_cost_per_token": 0.0000002,
+ "output_cost_per_token": 0.0000002,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
+ "perplexity/llama-3.1-sonar-small-128k-online": {
+ "max_tokens": 127072,
+ "max_input_tokens": 127072,
+ "max_output_tokens": 127072,
+ "input_cost_per_token": 0.0000002,
+ "output_cost_per_token": 0.0000002,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
"perplexity/pplx-7b-chat": {
"max_tokens": 8192,
"max_input_tokens": 8192,
diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html
deleted file mode 100644
index 0de1d45fae..0000000000
--- a/litellm/proxy/_experimental/out/404.html
+++ /dev/null
@@ -1 +0,0 @@
-
404: This page could not be found.LiteLLM Dashboard404
This page could not be found.
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub.html
deleted file mode 100644
index 2476ecba73..0000000000
--- a/litellm/proxy/_experimental/out/model_hub.html
+++ /dev/null
@@ -1 +0,0 @@
-LiteLLM Dashboard
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html
deleted file mode 100644
index 0ea4969e32..0000000000
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ /dev/null
@@ -1 +0,0 @@
-LiteLLM Dashboard
\ No newline at end of file
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 41b2a66c01..dfa5c16520 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,7 +1,6 @@
model_list:
- - model_name: azure-embedding-model
+ - model_name: "gpt-4"
litellm_params:
- model: azure/azure-embedding-model
- api_base: os.environ/AZURE_API_BASE
- api_key: os.environ/AZURE_API_KEY
- api_version: "2023-07-01-preview"
+ model: "gpt-4"
+ model_info:
+ my_custom_key: "my_custom_value"
\ No newline at end of file
diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py
index 7ed45bb51a..00e78f64e6 100644
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@@ -85,6 +85,8 @@ def _get_bearer_token(
):
if api_key.startswith("Bearer "): # ensure Bearer token passed in
api_key = api_key.replace("Bearer ", "") # extract the token
+ elif api_key.startswith("Basic "):
+ api_key = api_key.replace("Basic ", "") # handle langfuse input
else:
api_key = ""
return api_key
@@ -138,7 +140,6 @@ async def user_api_key_auth(
pass_through_endpoints: Optional[List[dict]] = general_settings.get(
"pass_through_endpoints", None
)
-
if isinstance(api_key, str):
passed_in_key = api_key
api_key = _get_bearer_token(api_key=api_key)
@@ -367,6 +368,40 @@ async def user_api_key_auth(
parent_otel_span=parent_otel_span,
)
#### ELSE ####
+
+ ## CHECK PASS-THROUGH ENDPOINTS ##
+ if pass_through_endpoints is not None:
+ for endpoint in pass_through_endpoints:
+ if endpoint.get("path", "") == route:
+ ## IF AUTH DISABLED
+ if endpoint.get("auth") is not True:
+ return UserAPIKeyAuth()
+ ## IF AUTH ENABLED
+ ### IF CUSTOM PARSER REQUIRED
+ if (
+ endpoint.get("custom_auth_parser") is not None
+ and endpoint.get("custom_auth_parser") == "langfuse"
+ ):
+ """
+ - langfuse returns {'Authorization': 'Basic YW55dGhpbmc6YW55dGhpbmc'}
+ - check the langfuse public key if it contains the litellm api key
+ """
+ import base64
+
+ api_key = api_key.replace("Basic ", "").strip()
+ decoded_bytes = base64.b64decode(api_key)
+ decoded_str = decoded_bytes.decode("utf-8")
+ api_key = decoded_str.split(":")[0]
+ else:
+ headers = endpoint.get("headers", None)
+ if headers is not None:
+ header_key = headers.get("litellm_user_api_key", "")
+ if (
+ isinstance(request.headers, dict)
+ and request.headers.get(key=header_key) is not None
+ ):
+ api_key = request.headers.get(key=header_key)
+
if master_key is None:
if isinstance(api_key, str):
return UserAPIKeyAuth(
@@ -533,7 +568,11 @@ async def user_api_key_auth(
if isinstance(
api_key, str
): # if generated token, make sure it starts with sk-.
- assert api_key.startswith("sk-") # prevent token hashes from being used
+ assert api_key.startswith(
+ "sk-"
+ ), "LiteLLM Virtual Key expected. Received={}, expected to start with 'sk-'.".format(
+ api_key
+ ) # prevent token hashes from being used
else:
verbose_logger.warning(
"litellm.proxy.proxy_server.user_api_key_auth(): Warning - Key={} is not a string.".format(
diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index 990cb52337..9b896f66c2 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -5,7 +5,12 @@ from fastapi import Request
import litellm
from litellm._logging import verbose_logger, verbose_proxy_logger
-from litellm.proxy._types import CommonProxyErrors, TeamCallbackMetadata, UserAPIKeyAuth
+from litellm.proxy._types import (
+ AddTeamCallback,
+ CommonProxyErrors,
+ TeamCallbackMetadata,
+ UserAPIKeyAuth,
+)
from litellm.types.utils import SupportedCacheControls
if TYPE_CHECKING:
@@ -59,6 +64,42 @@ def safe_add_api_version_from_query_params(data: dict, request: Request):
verbose_logger.error("error checking api version in query params: %s", str(e))
+def convert_key_logging_metadata_to_callback(
+ data: AddTeamCallback, team_callback_settings_obj: Optional[TeamCallbackMetadata]
+) -> TeamCallbackMetadata:
+ if team_callback_settings_obj is None:
+ team_callback_settings_obj = TeamCallbackMetadata()
+ if data.callback_type == "success":
+ if team_callback_settings_obj.success_callback is None:
+ team_callback_settings_obj.success_callback = []
+
+ if data.callback_name not in team_callback_settings_obj.success_callback:
+ team_callback_settings_obj.success_callback.append(data.callback_name)
+ elif data.callback_type == "failure":
+ if team_callback_settings_obj.failure_callback is None:
+ team_callback_settings_obj.failure_callback = []
+
+ if data.callback_name not in team_callback_settings_obj.failure_callback:
+ team_callback_settings_obj.failure_callback.append(data.callback_name)
+ elif data.callback_type == "success_and_failure":
+ if team_callback_settings_obj.success_callback is None:
+ team_callback_settings_obj.success_callback = []
+ if team_callback_settings_obj.failure_callback is None:
+ team_callback_settings_obj.failure_callback = []
+ if data.callback_name not in team_callback_settings_obj.success_callback:
+ team_callback_settings_obj.success_callback.append(data.callback_name)
+
+ if data.callback_name in team_callback_settings_obj.failure_callback:
+ team_callback_settings_obj.failure_callback.append(data.callback_name)
+
+ for var, value in data.callback_vars.items():
+ if team_callback_settings_obj.callback_vars is None:
+ team_callback_settings_obj.callback_vars = {}
+ team_callback_settings_obj.callback_vars[var] = litellm.get_secret(value)
+
+ return team_callback_settings_obj
+
+
async def add_litellm_data_to_request(
data: dict,
request: Request,
@@ -224,6 +265,7 @@ async def add_litellm_data_to_request(
} # add the team-specific configs to the completion call
# Team Callbacks controls
+ callback_settings_obj: Optional[TeamCallbackMetadata] = None
if user_api_key_dict.team_metadata is not None:
team_metadata = user_api_key_dict.team_metadata
if "callback_settings" in team_metadata:
@@ -241,13 +283,25 @@ async def add_litellm_data_to_request(
}
}
"""
- data["success_callback"] = callback_settings_obj.success_callback
- data["failure_callback"] = callback_settings_obj.failure_callback
+ elif (
+ user_api_key_dict.metadata is not None
+ and "logging" in user_api_key_dict.metadata
+ ):
+ for item in user_api_key_dict.metadata["logging"]:
- if callback_settings_obj.callback_vars is not None:
- # unpack callback_vars in data
- for k, v in callback_settings_obj.callback_vars.items():
- data[k] = v
+ callback_settings_obj = convert_key_logging_metadata_to_callback(
+ data=AddTeamCallback(**item),
+ team_callback_settings_obj=callback_settings_obj,
+ )
+
+ if callback_settings_obj is not None:
+ data["success_callback"] = callback_settings_obj.success_callback
+ data["failure_callback"] = callback_settings_obj.failure_callback
+
+ if callback_settings_obj.callback_vars is not None:
+ # unpack callback_vars in data
+ for k, v in callback_settings_obj.callback_vars.items():
+ data[k] = v
return data
diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
index d71863497f..15129854a3 100644
--- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
@@ -309,7 +309,7 @@ async def pass_through_request(
json=_parsed_body,
)
- if response.status_code != 200:
+ if response.status_code >= 300:
raise HTTPException(status_code=response.status_code, detail=response.text)
content = await response.aread()
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 660c27f249..4a1fc84a80 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -39,7 +39,4 @@ general_settings:
litellm_settings:
fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}]
- success_callback: ["langfuse", "prometheus"]
- langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"]
- failure_callback: ["prometheus"]
- cache: True
+ callbacks: ["gcs_bucket"]
diff --git a/litellm/proxy/spend_tracking/spend_tracking_utils.py b/litellm/proxy/spend_tracking/spend_tracking_utils.py
index cd7004e41d..6a28d70b17 100644
--- a/litellm/proxy/spend_tracking/spend_tracking_utils.py
+++ b/litellm/proxy/spend_tracking/spend_tracking_utils.py
@@ -21,6 +21,8 @@ def get_logging_payload(
if kwargs is None:
kwargs = {}
+ if response_obj is None:
+ response_obj = {}
# standardize this function to be used across, s3, dynamoDB, langfuse logging
litellm_params = kwargs.get("litellm_params", {})
metadata = (
diff --git a/litellm/router_utils/client_initalization_utils.py b/litellm/router_utils/client_initalization_utils.py
index 073a87901a..f396defb51 100644
--- a/litellm/router_utils/client_initalization_utils.py
+++ b/litellm/router_utils/client_initalization_utils.py
@@ -190,7 +190,7 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict):
if azure_ad_token.startswith("oidc/"):
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
if api_version is None:
- api_version = litellm.AZURE_DEFAULT_API_VERSION
+ api_version = os.getenv("AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION)
if "gateway.ai.cloudflare.com" in api_base:
if not api_base.endswith("/"):
diff --git a/litellm/tests/test_anthropic_prompt_caching.py b/litellm/tests/test_anthropic_prompt_caching.py
new file mode 100644
index 0000000000..87bfc23f84
--- /dev/null
+++ b/litellm/tests/test_anthropic_prompt_caching.py
@@ -0,0 +1,321 @@
+import json
+import os
+import sys
+import traceback
+
+from dotenv import load_dotenv
+
+load_dotenv()
+import io
+import os
+
+sys.path.insert(
+ 0, os.path.abspath("../..")
+) # Adds the parent directory to the system path
+
+import os
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+import litellm
+from litellm import RateLimitError, Timeout, completion, completion_cost, embedding
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.llms.prompt_templates.factory import anthropic_messages_pt
+
+# litellm.num_retries =3
+litellm.cache = None
+litellm.success_callback = []
+user_message = "Write a short poem about the sky"
+messages = [{"content": user_message, "role": "user"}]
+
+
+def logger_fn(user_model_dict):
+ print(f"user_model_dict: {user_model_dict}")
+
+
+@pytest.fixture(autouse=True)
+def reset_callbacks():
+ print("\npytest fixture - resetting callbacks")
+ litellm.success_callback = []
+ litellm._async_success_callback = []
+ litellm.failure_callback = []
+ litellm.callbacks = []
+
+
+@pytest.mark.asyncio
+async def test_litellm_anthropic_prompt_caching_tools():
+ # Arrange: Set up the MagicMock for the httpx.AsyncClient
+ mock_response = AsyncMock()
+
+ def return_val():
+ return {
+ "id": "msg_01XFDUDYJgAACzvnptvVoYEL",
+ "type": "message",
+ "role": "assistant",
+ "content": [{"type": "text", "text": "Hello!"}],
+ "model": "claude-3-5-sonnet-20240620",
+ "stop_reason": "end_turn",
+ "stop_sequence": None,
+ "usage": {"input_tokens": 12, "output_tokens": 6},
+ }
+
+ mock_response.json = return_val
+
+ litellm.set_verbose = True
+ with patch(
+ "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
+ return_value=mock_response,
+ ) as mock_post:
+ # Act: Call the litellm.acompletion function
+ response = await litellm.acompletion(
+ api_key="mock_api_key",
+ model="anthropic/claude-3-5-sonnet-20240620",
+ messages=[
+ {"role": "user", "content": "What's the weather like in Boston today?"}
+ ],
+ tools=[
+ {
+ "type": "function",
+ "function": {
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA",
+ },
+ "unit": {
+ "type": "string",
+ "enum": ["celsius", "fahrenheit"],
+ },
+ },
+ "required": ["location"],
+ },
+ "cache_control": {"type": "ephemeral"},
+ },
+ }
+ ],
+ extra_headers={
+ "anthropic-version": "2023-06-01",
+ "anthropic-beta": "prompt-caching-2024-07-31",
+ },
+ )
+
+ # Print what was called on the mock
+ print("call args=", mock_post.call_args)
+
+ expected_url = "https://api.anthropic.com/v1/messages"
+ expected_headers = {
+ "accept": "application/json",
+ "content-type": "application/json",
+ "anthropic-version": "2023-06-01",
+ "anthropic-beta": "prompt-caching-2024-07-31",
+ "x-api-key": "mock_api_key",
+ }
+
+ expected_json = {
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "What's the weather like in Boston today?",
+ }
+ ],
+ }
+ ],
+ "tools": [
+ {
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "cache_control": {"type": "ephemeral"},
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA",
+ },
+ "unit": {
+ "type": "string",
+ "enum": ["celsius", "fahrenheit"],
+ },
+ },
+ "required": ["location"],
+ },
+ }
+ ],
+ "max_tokens": 4096,
+ "model": "claude-3-5-sonnet-20240620",
+ }
+
+ mock_post.assert_called_once_with(
+ expected_url, json=expected_json, headers=expected_headers, timeout=600.0
+ )
+
+
+@pytest.mark.asyncio()
+async def test_anthropic_api_prompt_caching_basic():
+ litellm.set_verbose = True
+ response = await litellm.acompletion(
+ model="anthropic/claude-3-5-sonnet-20240620",
+ messages=[
+ # System Message
+ {
+ "role": "system",
+ "content": [
+ {
+ "type": "text",
+ "text": "Here is the full text of a complex legal agreement"
+ * 400,
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ },
+ # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "What are the key terms and conditions in this agreement?",
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ },
+ {
+ "role": "assistant",
+ "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
+ },
+ # The final turn is marked with cache-control, for continuing in followups.
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "What are the key terms and conditions in this agreement?",
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ },
+ ],
+ temperature=0.2,
+ max_tokens=10,
+ extra_headers={
+ "anthropic-version": "2023-06-01",
+ "anthropic-beta": "prompt-caching-2024-07-31",
+ },
+ )
+
+ print("response=", response)
+
+ assert "cache_read_input_tokens" in response.usage
+ assert "cache_creation_input_tokens" in response.usage
+
+ # Assert either a cache entry was created or cache was read - changes depending on the anthropic api ttl
+ assert (response.usage.cache_read_input_tokens > 0) or (
+ response.usage.cache_creation_input_tokens > 0
+ )
+
+
+@pytest.mark.asyncio
+async def test_litellm_anthropic_prompt_caching_system():
+ # https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#prompt-caching-examples
+ # LArge Context Caching Example
+ mock_response = AsyncMock()
+
+ def return_val():
+ return {
+ "id": "msg_01XFDUDYJgAACzvnptvVoYEL",
+ "type": "message",
+ "role": "assistant",
+ "content": [{"type": "text", "text": "Hello!"}],
+ "model": "claude-3-5-sonnet-20240620",
+ "stop_reason": "end_turn",
+ "stop_sequence": None,
+ "usage": {"input_tokens": 12, "output_tokens": 6},
+ }
+
+ mock_response.json = return_val
+
+ litellm.set_verbose = True
+ with patch(
+ "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
+ return_value=mock_response,
+ ) as mock_post:
+ # Act: Call the litellm.acompletion function
+ response = await litellm.acompletion(
+ api_key="mock_api_key",
+ model="anthropic/claude-3-5-sonnet-20240620",
+ messages=[
+ {
+ "role": "system",
+ "content": [
+ {
+ "type": "text",
+ "text": "You are an AI assistant tasked with analyzing legal documents.",
+ },
+ {
+ "type": "text",
+ "text": "Here is the full text of a complex legal agreement",
+ "cache_control": {"type": "ephemeral"},
+ },
+ ],
+ },
+ {
+ "role": "user",
+ "content": "what are the key terms and conditions in this agreement?",
+ },
+ ],
+ extra_headers={
+ "anthropic-version": "2023-06-01",
+ "anthropic-beta": "prompt-caching-2024-07-31",
+ },
+ )
+
+ # Print what was called on the mock
+ print("call args=", mock_post.call_args)
+
+ expected_url = "https://api.anthropic.com/v1/messages"
+ expected_headers = {
+ "accept": "application/json",
+ "content-type": "application/json",
+ "anthropic-version": "2023-06-01",
+ "anthropic-beta": "prompt-caching-2024-07-31",
+ "x-api-key": "mock_api_key",
+ }
+
+ expected_json = {
+ "system": [
+ {
+ "type": "text",
+ "text": "You are an AI assistant tasked with analyzing legal documents.",
+ },
+ {
+ "type": "text",
+ "text": "Here is the full text of a complex legal agreement",
+ "cache_control": {"type": "ephemeral"},
+ },
+ ],
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "what are the key terms and conditions in this agreement?",
+ }
+ ],
+ }
+ ],
+ "max_tokens": 4096,
+ "model": "claude-3-5-sonnet-20240620",
+ }
+
+ mock_post.assert_called_once_with(
+ expected_url, json=expected_json, headers=expected_headers, timeout=600.0
+ )
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 033b4431fa..cc1b24cde1 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -14,7 +14,7 @@ sys.path.insert(
) # Adds the parent directory to the system path
import os
-from unittest.mock import MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock, patch
import pytest
@@ -3474,7 +3474,6 @@ def response_format_tests(response: litellm.ModelResponse):
assert isinstance(response.usage.total_tokens, int) # type: ignore
-@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.parametrize(
"model",
[
@@ -3488,6 +3487,7 @@ def response_format_tests(response: litellm.ModelResponse):
"cohere.command-text-v14",
],
)
+@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_completion_bedrock_httpx_models(sync_mode, model):
litellm.set_verbose = True
@@ -3730,19 +3730,21 @@ def test_completion_anyscale_api():
# test_completion_anyscale_api()
-@pytest.mark.skip(reason="flaky test, times out frequently")
+# @pytest.mark.skip(reason="flaky test, times out frequently")
def test_completion_cohere():
try:
# litellm.set_verbose=True
messages = [
{"role": "system", "content": "You're a good bot"},
+ {"role": "assistant", "content": [{"text": "2", "type": "text"}]},
+ {"role": "assistant", "content": [{"text": "3", "type": "text"}]},
{
"role": "user",
"content": "Hey",
},
]
response = completion(
- model="command-nightly",
+ model="command-r",
messages=messages,
)
print(response)
diff --git a/litellm/tests/test_function_call_parsing.py b/litellm/tests/test_function_call_parsing.py
index d223a7c8f6..fab9cf110c 100644
--- a/litellm/tests/test_function_call_parsing.py
+++ b/litellm/tests/test_function_call_parsing.py
@@ -1,23 +1,27 @@
# What is this?
## Test to make sure function call response always works with json.loads() -> no extra parsing required. Relevant issue - https://github.com/BerriAI/litellm/issues/2654
-import sys, os
+import os
+import sys
import traceback
+
from dotenv import load_dotenv
load_dotenv()
-import os, io
+import io
+import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
-import pytest
-import litellm
import json
import warnings
-
-from litellm import completion
from typing import List
+import pytest
+
+import litellm
+from litellm import completion
+
# Just a stub to keep the sample code simple
class Trade:
@@ -78,58 +82,60 @@ def trade(model_name: str) -> List[Trade]:
},
}
- response = completion(
- model_name,
- [
- {
- "role": "system",
- "content": """You are an expert asset manager, managing a portfolio.
+ try:
+ response = completion(
+ model_name,
+ [
+ {
+ "role": "system",
+ "content": """You are an expert asset manager, managing a portfolio.
- Always use the `trade` function. Make sure that you call it correctly. For example, the following is a valid call:
+ Always use the `trade` function. Make sure that you call it correctly. For example, the following is a valid call:
+ ```
+ trade({
+ "orders": [
+ {"action": "buy", "asset": "BTC", "amount": 0.1},
+ {"action": "sell", "asset": "ETH", "amount": 0.2}
+ ]
+ })
+ ```
+
+ If there are no trades to make, call `trade` with an empty array:
+ ```
+ trade({ "orders": [] })
+ ```
+ """,
+ },
+ {
+ "role": "user",
+ "content": """Manage the portfolio.
+
+ Don't jabber.
+
+ This is the current market data:
```
- trade({
- "orders": [
- {"action": "buy", "asset": "BTC", "amount": 0.1},
- {"action": "sell", "asset": "ETH", "amount": 0.2}
- ]
- })
+ {market_data}
```
- If there are no trades to make, call `trade` with an empty array:
+ Your portfolio is as follows:
```
- trade({ "orders": [] })
+ {portfolio}
```
- """,
+ """.replace(
+ "{market_data}", "BTC: 64,000 USD\nETH: 3,500 USD"
+ ).replace(
+ "{portfolio}", "USD: 1000, BTC: 0.1, ETH: 0.2"
+ ),
+ },
+ ],
+ tools=[tool_spec],
+ tool_choice={
+ "type": "function",
+ "function": {"name": tool_spec["function"]["name"]}, # type: ignore
},
- {
- "role": "user",
- "content": """Manage the portfolio.
-
- Don't jabber.
-
- This is the current market data:
- ```
- {market_data}
- ```
-
- Your portfolio is as follows:
- ```
- {portfolio}
- ```
- """.replace(
- "{market_data}", "BTC: 64,000 USD\nETH: 3,500 USD"
- ).replace(
- "{portfolio}", "USD: 1000, BTC: 0.1, ETH: 0.2"
- ),
- },
- ],
- tools=[tool_spec],
- tool_choice={
- "type": "function",
- "function": {"name": tool_spec["function"]["name"]}, # type: ignore
- },
- )
-
+ )
+ except litellm.InternalServerError:
+ pass
calls = response.choices[0].message.tool_calls
trades = [trade for call in calls for trade in parse_call(call)]
return trades
diff --git a/litellm/tests/test_gcs_bucket.py b/litellm/tests/test_gcs_bucket.py
index c21988c73d..f0aaf8d8dd 100644
--- a/litellm/tests/test_gcs_bucket.py
+++ b/litellm/tests/test_gcs_bucket.py
@@ -147,6 +147,117 @@ async def test_basic_gcs_logger():
assert gcs_payload["response_cost"] > 0.0
+ assert gcs_payload["log_event_type"] == "successful_api_call"
+ gcs_payload["spend_log_metadata"] = json.loads(gcs_payload["spend_log_metadata"])
+
+ assert (
+ gcs_payload["spend_log_metadata"]["user_api_key"]
+ == "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b"
+ )
+ assert (
+ gcs_payload["spend_log_metadata"]["user_api_key_user_id"]
+ == "116544810872468347480"
+ )
+
+ # Delete Object from GCS
+ print("deleting object from GCS")
+ await gcs_logger.delete_gcs_object(object_name=object_name)
+
+
+@pytest.mark.asyncio
+async def test_basic_gcs_logger_failure():
+ load_vertex_ai_credentials()
+ gcs_logger = GCSBucketLogger()
+ print("GCSBucketLogger", gcs_logger)
+
+ gcs_log_id = f"failure-test-{uuid.uuid4().hex}"
+
+ litellm.callbacks = [gcs_logger]
+
+ try:
+ response = await litellm.acompletion(
+ model="gpt-3.5-turbo",
+ temperature=0.7,
+ messages=[{"role": "user", "content": "This is a test"}],
+ max_tokens=10,
+ user="ishaan-2",
+ mock_response=litellm.BadRequestError(
+ model="gpt-3.5-turbo",
+ message="Error: 400: Bad Request: Invalid API key, please check your API key and try again.",
+ llm_provider="openai",
+ ),
+ metadata={
+ "gcs_log_id": gcs_log_id,
+ "tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"],
+ "user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
+ "user_api_key_alias": None,
+ "user_api_end_user_max_budget": None,
+ "litellm_api_version": "0.0.0",
+ "global_max_parallel_requests": None,
+ "user_api_key_user_id": "116544810872468347480",
+ "user_api_key_org_id": None,
+ "user_api_key_team_id": None,
+ "user_api_key_team_alias": None,
+ "user_api_key_metadata": {},
+ "requester_ip_address": "127.0.0.1",
+ "spend_logs_metadata": {"hello": "world"},
+ "headers": {
+ "content-type": "application/json",
+ "user-agent": "PostmanRuntime/7.32.3",
+ "accept": "*/*",
+ "postman-token": "92300061-eeaa-423b-a420-0b44896ecdc4",
+ "host": "localhost:4000",
+ "accept-encoding": "gzip, deflate, br",
+ "connection": "keep-alive",
+ "content-length": "163",
+ },
+ "endpoint": "http://localhost:4000/chat/completions",
+ "model_group": "gpt-3.5-turbo",
+ "deployment": "azure/chatgpt-v-2",
+ "model_info": {
+ "id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
+ "db_model": False,
+ },
+ "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
+ "caching_groups": None,
+ "raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
+ },
+ )
+ except:
+ pass
+
+ await asyncio.sleep(5)
+
+ # Get the current date
+ # Get the current date
+ current_date = datetime.now().strftime("%Y-%m-%d")
+
+ # Modify the object_name to include the date-based folder
+ object_name = gcs_log_id
+
+ print("object_name", object_name)
+
+ # Check if object landed on GCS
+ object_from_gcs = await gcs_logger.download_gcs_object(object_name=object_name)
+ print("object from gcs=", object_from_gcs)
+ # convert object_from_gcs from bytes to DICT
+ parsed_data = json.loads(object_from_gcs)
+ print("object_from_gcs as dict", parsed_data)
+
+ print("type of object_from_gcs", type(parsed_data))
+
+ gcs_payload = GCSBucketPayload(**parsed_data)
+
+ print("gcs_payload", gcs_payload)
+
+ assert gcs_payload["request_kwargs"]["model"] == "gpt-3.5-turbo"
+ assert gcs_payload["request_kwargs"]["messages"] == [
+ {"role": "user", "content": "This is a test"}
+ ]
+
+ assert gcs_payload["response_cost"] == 0
+ assert gcs_payload["log_event_type"] == "failed_api_call"
+
gcs_payload["spend_log_metadata"] = json.loads(gcs_payload["spend_log_metadata"])
assert (
diff --git a/litellm/tests/test_pass_through_endpoints.py b/litellm/tests/test_pass_through_endpoints.py
index 4f52f3d192..d78a40d378 100644
--- a/litellm/tests/test_pass_through_endpoints.py
+++ b/litellm/tests/test_pass_through_endpoints.py
@@ -1,5 +1,6 @@
import os
import sys
+from typing import Optional
import pytest
from fastapi import FastAPI
@@ -30,6 +31,7 @@ def client():
async def test_pass_through_endpoint(client, monkeypatch):
# Mock the httpx.AsyncClient.request method
monkeypatch.setattr("httpx.AsyncClient.request", mock_request)
+ import litellm
# Define a pass-through endpoint
pass_through_endpoints = [
@@ -42,6 +44,11 @@ async def test_pass_through_endpoint(client, monkeypatch):
# Initialize the pass-through endpoint
await initialize_pass_through_endpoints(pass_through_endpoints)
+ general_settings: Optional[dict] = (
+ getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
+ )
+ general_settings.update({"pass_through_endpoints": pass_through_endpoints})
+ setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
# Make a request to the pass-through endpoint
response = client.post("/test-endpoint", json={"prompt": "Hello, world!"})
@@ -54,6 +61,7 @@ async def test_pass_through_endpoint(client, monkeypatch):
@pytest.mark.asyncio
async def test_pass_through_endpoint_rerank(client):
_cohere_api_key = os.environ.get("COHERE_API_KEY")
+ import litellm
# Define a pass-through endpoint
pass_through_endpoints = [
@@ -66,6 +74,11 @@ async def test_pass_through_endpoint_rerank(client):
# Initialize the pass-through endpoint
await initialize_pass_through_endpoints(pass_through_endpoints)
+ general_settings: Optional[dict] = (
+ getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
+ )
+ general_settings.update({"pass_through_endpoints": pass_through_endpoints})
+ setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
_json_data = {
"model": "rerank-english-v3.0",
@@ -87,7 +100,7 @@ async def test_pass_through_endpoint_rerank(client):
@pytest.mark.parametrize(
"auth, rpm_limit, expected_error_code",
- [(True, 0, 429), (True, 1, 200), (False, 0, 401)],
+ [(True, 0, 429), (True, 1, 200), (False, 0, 200)],
)
@pytest.mark.asyncio
async def test_pass_through_endpoint_rpm_limit(auth, expected_error_code, rpm_limit):
@@ -123,6 +136,11 @@ async def test_pass_through_endpoint_rpm_limit(auth, expected_error_code, rpm_li
# Initialize the pass-through endpoint
await initialize_pass_through_endpoints(pass_through_endpoints)
+ general_settings: Optional[dict] = (
+ getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
+ )
+ general_settings.update({"pass_through_endpoints": pass_through_endpoints})
+ setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
_json_data = {
"model": "rerank-english-v3.0",
@@ -146,6 +164,123 @@ async def test_pass_through_endpoint_rpm_limit(auth, expected_error_code, rpm_li
assert response.status_code == expected_error_code
+@pytest.mark.parametrize(
+ "auth, rpm_limit, expected_error_code",
+ [(True, 0, 429), (True, 1, 207), (False, 0, 207)],
+)
+@pytest.mark.asyncio
+async def test_aaapass_through_endpoint_pass_through_keys_langfuse(
+ auth, expected_error_code, rpm_limit
+):
+
+ client = TestClient(app)
+ import litellm
+ from litellm.proxy._types import UserAPIKeyAuth
+ from litellm.proxy.proxy_server import ProxyLogging, hash_token, user_api_key_cache
+
+ # Store original values
+ original_user_api_key_cache = getattr(
+ litellm.proxy.proxy_server, "user_api_key_cache", None
+ )
+ original_master_key = getattr(litellm.proxy.proxy_server, "master_key", None)
+ original_prisma_client = getattr(litellm.proxy.proxy_server, "prisma_client", None)
+ original_proxy_logging_obj = getattr(
+ litellm.proxy.proxy_server, "proxy_logging_obj", None
+ )
+
+ try:
+
+ mock_api_key = "sk-my-test-key"
+ cache_value = UserAPIKeyAuth(
+ token=hash_token(mock_api_key), rpm_limit=rpm_limit
+ )
+
+ _cohere_api_key = os.environ.get("COHERE_API_KEY")
+
+ user_api_key_cache.set_cache(key=hash_token(mock_api_key), value=cache_value)
+
+ proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
+ proxy_logging_obj._init_litellm_callbacks()
+
+ setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+ setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+ setattr(litellm.proxy.proxy_server, "prisma_client", "FAKE-VAR")
+ setattr(litellm.proxy.proxy_server, "proxy_logging_obj", proxy_logging_obj)
+
+ # Define a pass-through endpoint
+ pass_through_endpoints = [
+ {
+ "path": "/api/public/ingestion",
+ "target": "https://cloud.langfuse.com/api/public/ingestion",
+ "auth": auth,
+ "custom_auth_parser": "langfuse",
+ "headers": {
+ "LANGFUSE_PUBLIC_KEY": "os.environ/LANGFUSE_PUBLIC_KEY",
+ "LANGFUSE_SECRET_KEY": "os.environ/LANGFUSE_SECRET_KEY",
+ },
+ }
+ ]
+
+ # Initialize the pass-through endpoint
+ await initialize_pass_through_endpoints(pass_through_endpoints)
+ general_settings: Optional[dict] = (
+ getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
+ )
+ old_general_settings = general_settings
+ general_settings.update({"pass_through_endpoints": pass_through_endpoints})
+ setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
+
+ _json_data = {
+ "batch": [
+ {
+ "id": "80e2141f-0ca6-47b7-9c06-dde5e97de690",
+ "type": "trace-create",
+ "body": {
+ "id": "0687af7b-4a75-4de8-a4f6-cba1cdc00865",
+ "timestamp": "2024-08-14T02:38:56.092950Z",
+ "name": "test-trace-litellm-proxy-passthrough",
+ },
+ "timestamp": "2024-08-14T02:38:56.093352Z",
+ }
+ ],
+ "metadata": {
+ "batch_size": 1,
+ "sdk_integration": "default",
+ "sdk_name": "python",
+ "sdk_version": "2.27.0",
+ "public_key": "anything",
+ },
+ }
+
+ # Make a request to the pass-through endpoint
+ response = client.post(
+ "/api/public/ingestion",
+ json=_json_data,
+ headers={"Authorization": "Basic c2stbXktdGVzdC1rZXk6YW55dGhpbmc="},
+ )
+
+ print("JSON response: ", _json_data)
+
+ print("RESPONSE RECEIVED - {}".format(response.text))
+
+ # Assert the response
+ assert response.status_code == expected_error_code
+
+ setattr(litellm.proxy.proxy_server, "general_settings", old_general_settings)
+ finally:
+ # Reset to original values
+ setattr(
+ litellm.proxy.proxy_server,
+ "user_api_key_cache",
+ original_user_api_key_cache,
+ )
+ setattr(litellm.proxy.proxy_server, "master_key", original_master_key)
+ setattr(litellm.proxy.proxy_server, "prisma_client", original_prisma_client)
+ setattr(
+ litellm.proxy.proxy_server, "proxy_logging_obj", original_proxy_logging_obj
+ )
+
+
@pytest.mark.asyncio
async def test_pass_through_endpoint_anthropic(client):
import litellm
@@ -178,6 +313,11 @@ async def test_pass_through_endpoint_anthropic(client):
# Initialize the pass-through endpoint
await initialize_pass_through_endpoints(pass_through_endpoints)
+ general_settings: Optional[dict] = (
+ getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
+ )
+ general_settings.update({"pass_through_endpoints": pass_through_endpoints})
+ setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
_json_data = {
"model": "gpt-3.5-turbo",
diff --git a/litellm/tests/test_prometheus.py b/litellm/tests/test_prometheus.py
index 64e824e6db..7574beb9d9 100644
--- a/litellm/tests/test_prometheus.py
+++ b/litellm/tests/test_prometheus.py
@@ -76,6 +76,6 @@ async def test_async_prometheus_success_logging():
print("metrics from prometheus", metrics)
assert metrics["litellm_requests_metric_total"] == 1.0
assert metrics["litellm_total_tokens_total"] == 30.0
- assert metrics["llm_deployment_success_responses_total"] == 1.0
- assert metrics["llm_deployment_total_requests_total"] == 1.0
- assert metrics["llm_deployment_latency_per_output_token_bucket"] == 1.0
+ assert metrics["litellm_deployment_success_responses_total"] == 1.0
+ assert metrics["litellm_deployment_total_requests_total"] == 1.0
+ assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
diff --git a/litellm/tests/test_prompt_factory.py b/litellm/tests/test_prompt_factory.py
index f7a715a220..93e92a7926 100644
--- a/litellm/tests/test_prompt_factory.py
+++ b/litellm/tests/test_prompt_factory.py
@@ -260,3 +260,56 @@ def test_anthropic_messages_tool_call():
translated_messages[-1]["content"][0]["tool_use_id"]
== "bc8cb4b6-88c4-4138-8993-3a9d9cd51656"
)
+
+
+def test_anthropic_cache_controls_pt():
+ "see anthropic docs for this: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#continuing-a-multi-turn-conversation"
+ messages = [
+ # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "What are the key terms and conditions in this agreement?",
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ },
+ {
+ "role": "assistant",
+ "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
+ },
+ # The final turn is marked with cache-control, for continuing in followups.
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "What are the key terms and conditions in this agreement?",
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ },
+ {
+ "role": "assistant",
+ "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
+ "cache_control": {"type": "ephemeral"},
+ },
+ ]
+
+ translated_messages = anthropic_messages_pt(
+ messages, model="claude-3-5-sonnet-20240620", llm_provider="anthropic"
+ )
+
+ for i, msg in enumerate(translated_messages):
+ if i == 0:
+ assert msg["content"][0]["cache_control"] == {"type": "ephemeral"}
+ elif i == 1:
+ assert "cache_controls" not in msg["content"][0]
+ elif i == 2:
+ assert msg["content"][0]["cache_control"] == {"type": "ephemeral"}
+ elif i == 3:
+ assert msg["content"][0]["cache_control"] == {"type": "ephemeral"}
+
+ print("translated_messages: ", translated_messages)
diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py
index 757eef6d62..9a1c091267 100644
--- a/litellm/tests/test_proxy_server.py
+++ b/litellm/tests/test_proxy_server.py
@@ -966,3 +966,203 @@ async def test_user_info_team_list(prisma_client):
pass
mock_client.assert_called()
+
+
+@pytest.mark.skip(reason="Local test")
+@pytest.mark.asyncio
+async def test_add_callback_via_key(prisma_client):
+ """
+ Test if callback specified in key, is used.
+ """
+ global headers
+ import json
+
+ from fastapi import HTTPException, Request, Response
+ from starlette.datastructures import URL
+
+ from litellm.proxy.proxy_server import chat_completion
+
+ setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+ setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+ await litellm.proxy.proxy_server.prisma_client.connect()
+
+ litellm.set_verbose = True
+
+ try:
+ # Your test data
+ test_data = {
+ "model": "azure/chatgpt-v-2",
+ "messages": [
+ {"role": "user", "content": "write 1 sentence poem"},
+ ],
+ "max_tokens": 10,
+ "mock_response": "Hello world",
+ "api_key": "my-fake-key",
+ }
+
+ request = Request(scope={"type": "http", "method": "POST", "headers": {}})
+ request._url = URL(url="/chat/completions")
+
+ json_bytes = json.dumps(test_data).encode("utf-8")
+
+ request._body = json_bytes
+
+ with patch.object(
+ litellm.litellm_core_utils.litellm_logging,
+ "LangFuseLogger",
+ new=MagicMock(),
+ ) as mock_client:
+ resp = await chat_completion(
+ request=request,
+ fastapi_response=Response(),
+ user_api_key_dict=UserAPIKeyAuth(
+ metadata={
+ "logging": [
+ {
+ "callback_name": "langfuse", # 'otel', 'langfuse', 'lunary'
+ "callback_type": "success", # set, if required by integration - future improvement, have logging tools work for success + failure by default
+ "callback_vars": {
+ "langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY",
+ "langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY",
+ "langfuse_host": "https://us.cloud.langfuse.com",
+ },
+ }
+ ]
+ }
+ ),
+ )
+ print(resp)
+ mock_client.assert_called()
+ mock_client.return_value.log_event.assert_called()
+ args, kwargs = mock_client.return_value.log_event.call_args
+ kwargs = kwargs["kwargs"]
+ assert "user_api_key_metadata" in kwargs["litellm_params"]["metadata"]
+ assert (
+ "logging"
+ in kwargs["litellm_params"]["metadata"]["user_api_key_metadata"]
+ )
+ checked_keys = False
+ for item in kwargs["litellm_params"]["metadata"]["user_api_key_metadata"][
+ "logging"
+ ]:
+ for k, v in item["callback_vars"].items():
+ print("k={}, v={}".format(k, v))
+ if "key" in k:
+ assert "os.environ" in v
+ checked_keys = True
+
+ assert checked_keys
+ except Exception as e:
+ pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
+
+
+@pytest.mark.asyncio
+async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
+ import json
+
+ from fastapi import HTTPException, Request, Response
+ from starlette.datastructures import URL
+
+ from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
+
+ setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+ setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+ await litellm.proxy.proxy_server.prisma_client.connect()
+
+ proxy_config = getattr(litellm.proxy.proxy_server, "proxy_config")
+
+ request = Request(scope={"type": "http", "method": "POST", "headers": {}})
+ request._url = URL(url="/chat/completions")
+
+ test_data = {
+ "model": "azure/chatgpt-v-2",
+ "messages": [
+ {"role": "user", "content": "write 1 sentence poem"},
+ ],
+ "max_tokens": 10,
+ "mock_response": "Hello world",
+ "api_key": "my-fake-key",
+ }
+
+ json_bytes = json.dumps(test_data).encode("utf-8")
+
+ request._body = json_bytes
+
+ data = {
+ "data": {
+ "model": "azure/chatgpt-v-2",
+ "messages": [{"role": "user", "content": "write 1 sentence poem"}],
+ "max_tokens": 10,
+ "mock_response": "Hello world",
+ "api_key": "my-fake-key",
+ },
+ "request": request,
+ "user_api_key_dict": UserAPIKeyAuth(
+ token=None,
+ key_name=None,
+ key_alias=None,
+ spend=0.0,
+ max_budget=None,
+ expires=None,
+ models=[],
+ aliases={},
+ config={},
+ user_id=None,
+ team_id=None,
+ max_parallel_requests=None,
+ metadata={
+ "logging": [
+ {
+ "callback_name": "langfuse",
+ "callback_type": "success",
+ "callback_vars": {
+ "langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY",
+ "langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY",
+ "langfuse_host": "https://us.cloud.langfuse.com",
+ },
+ }
+ ]
+ },
+ tpm_limit=None,
+ rpm_limit=None,
+ budget_duration=None,
+ budget_reset_at=None,
+ allowed_cache_controls=[],
+ permissions={},
+ model_spend={},
+ model_max_budget={},
+ soft_budget_cooldown=False,
+ litellm_budget_table=None,
+ org_id=None,
+ team_spend=None,
+ team_alias=None,
+ team_tpm_limit=None,
+ team_rpm_limit=None,
+ team_max_budget=None,
+ team_models=[],
+ team_blocked=False,
+ soft_budget=None,
+ team_model_aliases=None,
+ team_member_spend=None,
+ team_metadata=None,
+ end_user_id=None,
+ end_user_tpm_limit=None,
+ end_user_rpm_limit=None,
+ end_user_max_budget=None,
+ last_refreshed_at=None,
+ api_key=None,
+ user_role=None,
+ allowed_model_region=None,
+ parent_otel_span=None,
+ ),
+ "proxy_config": proxy_config,
+ "general_settings": {},
+ "version": "0.0.0",
+ }
+
+ new_data = await add_litellm_data_to_request(**data)
+
+ assert "success_callback" in new_data
+ assert new_data["success_callback"] == ["langfuse"]
+ assert "langfuse_public_key" in new_data
+ assert "langfuse_secret_key" in new_data
diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py
index 36bcb6cc73..f14aa20c73 100644
--- a/litellm/types/llms/anthropic.py
+++ b/litellm/types/llms/anthropic.py
@@ -15,9 +15,10 @@ class AnthropicMessagesTool(TypedDict, total=False):
input_schema: Required[dict]
-class AnthropicMessagesTextParam(TypedDict):
+class AnthropicMessagesTextParam(TypedDict, total=False):
type: Literal["text"]
text: str
+ cache_control: Optional[dict]
class AnthropicMessagesToolUseParam(TypedDict):
@@ -54,9 +55,10 @@ class AnthropicImageParamSource(TypedDict):
data: str
-class AnthropicMessagesImageParam(TypedDict):
+class AnthropicMessagesImageParam(TypedDict, total=False):
type: Literal["image"]
source: AnthropicImageParamSource
+ cache_control: Optional[dict]
class AnthropicMessagesToolResultContent(TypedDict):
@@ -92,6 +94,12 @@ class AnthropicMetadata(TypedDict, total=False):
user_id: str
+class AnthropicSystemMessageContent(TypedDict, total=False):
+ type: str
+ text: str
+ cache_control: Optional[dict]
+
+
class AnthropicMessagesRequest(TypedDict, total=False):
model: Required[str]
messages: Required[
@@ -106,7 +114,7 @@ class AnthropicMessagesRequest(TypedDict, total=False):
metadata: AnthropicMetadata
stop_sequences: List[str]
stream: bool
- system: str
+ system: Union[str, List]
temperature: float
tool_choice: AnthropicMessagesToolChoice
tools: List[AnthropicMessagesTool]
diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py
index 0d67d5d602..5d2c416f9c 100644
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@@ -361,7 +361,7 @@ class ChatCompletionToolMessage(TypedDict):
class ChatCompletionSystemMessage(TypedDict, total=False):
role: Required[Literal["system"]]
- content: Required[str]
+ content: Required[Union[str, List]]
name: str
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index e31e6b3f4f..d30270c5c8 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -2074,7 +2074,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
- "supports_vision": true
+ "supports_vision": true,
+ "supports_assistant_prefill": true
},
"vertex_ai/claude-3-5-sonnet@20240620": {
"max_tokens": 4096,
@@ -2085,7 +2086,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
- "supports_vision": true
+ "supports_vision": true,
+ "supports_assistant_prefill": true
},
"vertex_ai/claude-3-haiku@20240307": {
"max_tokens": 4096,
@@ -2096,7 +2098,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
- "supports_vision": true
+ "supports_vision": true,
+ "supports_assistant_prefill": true
},
"vertex_ai/claude-3-opus@20240229": {
"max_tokens": 4096,
@@ -2107,7 +2110,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
- "supports_vision": true
+ "supports_vision": true,
+ "supports_assistant_prefill": true
},
"vertex_ai/meta/llama3-405b-instruct-maas": {
"max_tokens": 32000,
@@ -4531,6 +4535,69 @@
"litellm_provider": "perplexity",
"mode": "chat"
},
+ "perplexity/llama-3.1-70b-instruct": {
+ "max_tokens": 131072,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000001,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
+ "perplexity/llama-3.1-8b-instruct": {
+ "max_tokens": 131072,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "input_cost_per_token": 0.0000002,
+ "output_cost_per_token": 0.0000002,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
+ "perplexity/llama-3.1-sonar-huge-128k-online": {
+ "max_tokens": 127072,
+ "max_input_tokens": 127072,
+ "max_output_tokens": 127072,
+ "input_cost_per_token": 0.000005,
+ "output_cost_per_token": 0.000005,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
+ "perplexity/llama-3.1-sonar-large-128k-online": {
+ "max_tokens": 127072,
+ "max_input_tokens": 127072,
+ "max_output_tokens": 127072,
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000001,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
+ "perplexity/llama-3.1-sonar-large-128k-chat": {
+ "max_tokens": 131072,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "input_cost_per_token": 0.000001,
+ "output_cost_per_token": 0.000001,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
+ "perplexity/llama-3.1-sonar-small-128k-chat": {
+ "max_tokens": 131072,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "input_cost_per_token": 0.0000002,
+ "output_cost_per_token": 0.0000002,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
+ "perplexity/llama-3.1-sonar-small-128k-online": {
+ "max_tokens": 127072,
+ "max_input_tokens": 127072,
+ "max_output_tokens": 127072,
+ "input_cost_per_token": 0.0000002,
+ "output_cost_per_token": 0.0000002,
+ "litellm_provider": "perplexity",
+ "mode": "chat"
+ },
"perplexity/pplx-7b-chat": {
"max_tokens": 8192,
"max_input_tokens": 8192,
diff --git a/pyproject.toml b/pyproject.toml
index 5ae04ea924..97703d7088 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
-version = "1.43.10"
+version = "1.43.13"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
-version = "1.43.10"
+version = "1.43.13"
version_files = [
"pyproject.toml:^version"
]