diff --git a/.circleci/config.yml b/.circleci/config.yml index 56fb1fee1..b996dc312 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -807,11 +807,12 @@ jobs: curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash - run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1) - run: ruff check ./litellm - - run: python ./tests/documentation_tests/test_general_setting_keys.py + # - run: python ./tests/documentation_tests/test_general_setting_keys.py - run: python ./tests/code_coverage_tests/router_code_coverage.py - run: python ./tests/code_coverage_tests/test_router_strategy_async.py - run: python ./tests/code_coverage_tests/litellm_logging_code_coverage.py - run: python ./tests/documentation_tests/test_env_keys.py + - run: python ./tests/documentation_tests/test_router_settings.py - run: python ./tests/documentation_tests/test_api_docs.py - run: python ./tests/code_coverage_tests/ensure_async_clients_test.py - run: helm lint ./deploy/charts/litellm-helm @@ -1407,7 +1408,7 @@ jobs: command: | docker run -d \ -p 4000:4000 \ - -e DATABASE_URL=$PROXY_DATABASE_URL \ + -e DATABASE_URL=$PROXY_DATABASE_URL_2 \ -e LITELLM_MASTER_KEY="sk-1234" \ -e OPENAI_API_KEY=$OPENAI_API_KEY \ -e UI_USERNAME="admin" \ diff --git a/docs/my-website/docs/moderation.md b/docs/my-website/docs/moderation.md new file mode 100644 index 000000000..6dd092fb5 --- /dev/null +++ b/docs/my-website/docs/moderation.md @@ -0,0 +1,135 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Moderation + + +### Usage + + + +```python +from litellm import moderation + +response = moderation( + input="hello from litellm", + model="text-moderation-stable" +) +``` + + + + +For `/moderations` endpoint, there is **no need to specify `model` in the request or on the litellm config.yaml** + +Start litellm proxy server + +``` +litellm +``` + + + + + +```python +from openai import OpenAI + +# set base_url to your proxy server +# set api_key to send to proxy server +client = OpenAI(api_key="", base_url="http://0.0.0.0:4000") + +response = client.moderations.create( + input="hello from litellm", + model="text-moderation-stable" # optional, defaults to `omni-moderation-latest` +) + +print(response) +``` + + + + +```shell +curl --location 'http://0.0.0.0:4000/moderations' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer sk-1234' \ + --data '{"input": "Sample text goes here", "model": "text-moderation-stable"}' +``` + + + + + + +## Input Params +LiteLLM accepts and translates the [OpenAI Moderation params](https://platform.openai.com/docs/api-reference/moderations) across all supported providers. + +### Required Fields + +- `input`: *string or array* - Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models. + - If string: A string of text to classify for moderation + - If array of strings: An array of strings to classify for moderation + - If array of objects: An array of multi-modal inputs to the moderation model, where each object can be: + - An object describing an image to classify with: + - `type`: *string, required* - Always `image_url` + - `image_url`: *object, required* - Contains either an image URL or a data URL for a base64 encoded image + - An object describing text to classify with: + - `type`: *string, required* - Always `text` + - `text`: *string, required* - A string of text to classify + +### Optional Fields + +- `model`: *string (optional)* - The moderation model to use. Defaults to `omni-moderation-latest`. + +## Output Format +Here's the exact json output and type you can expect from all moderation calls: + +[**LiteLLM follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/moderations/object) + + +```python +{ + "id": "modr-AB8CjOTu2jiq12hp1AQPfeqFWaORR", + "model": "text-moderation-007", + "results": [ + { + "flagged": true, + "categories": { + "sexual": false, + "hate": false, + "harassment": true, + "self-harm": false, + "sexual/minors": false, + "hate/threatening": false, + "violence/graphic": false, + "self-harm/intent": false, + "self-harm/instructions": false, + "harassment/threatening": true, + "violence": true + }, + "category_scores": { + "sexual": 0.000011726012417057063, + "hate": 0.22706663608551025, + "harassment": 0.5215635299682617, + "self-harm": 2.227119921371923e-6, + "sexual/minors": 7.107352217872176e-8, + "hate/threatening": 0.023547329008579254, + "violence/graphic": 0.00003391829886822961, + "self-harm/intent": 1.646940972932498e-6, + "self-harm/instructions": 1.1198755256458526e-9, + "harassment/threatening": 0.5694745779037476, + "violence": 0.9971134662628174 + } + } + ] +} + +``` + + +## **Supported Providers** + +| Provider | +|-------------| +| OpenAI | diff --git a/docs/my-website/docs/observability/argilla.md b/docs/my-website/docs/observability/argilla.md index 8d20b9daa..dad28ce90 100644 --- a/docs/my-website/docs/observability/argilla.md +++ b/docs/my-website/docs/observability/argilla.md @@ -4,24 +4,63 @@ import TabItem from '@theme/TabItem'; # Argilla -Argilla is a tool for annotating datasets. +Argilla is a collaborative annotation tool for AI engineers and domain experts who need to build high-quality datasets for their projects. +## Getting Started -## Usage +To log the data to Argilla, first you need to deploy the Argilla server. If you have not deployed the Argilla server, please follow the instructions [here](https://docs.argilla.io/latest/getting_started/quickstart/). + +Next, you will need to configure and create the Argilla dataset. + +```python +import argilla as rg + +client = rg.Argilla(api_url="", api_key="") + +settings = rg.Settings( + guidelines="These are some guidelines.", + fields=[ + rg.ChatField( + name="user_input", + ), + rg.TextField( + name="llm_output", + ), + ], + questions=[ + rg.RatingQuestion( + name="rating", + values=[1, 2, 3, 4, 5, 6, 7], + ), + ], +) + +dataset = rg.Dataset( + name="my_first_dataset", + settings=settings, +) + +dataset.create() +``` + +For further configuration, please refer to the [Argilla documentation](https://docs.argilla.io/latest/how_to_guides/dataset/). + + +## Usage ```python -from litellm import completion +import os import litellm -import os +from litellm import completion # add env vars os.environ["ARGILLA_API_KEY"]="argilla.apikey" os.environ["ARGILLA_BASE_URL"]="http://localhost:6900" -os.environ["ARGILLA_DATASET_NAME"]="my_second_dataset" +os.environ["ARGILLA_DATASET_NAME"]="my_first_dataset" os.environ["OPENAI_API_KEY"]="sk-proj-..." litellm.callbacks = ["argilla"] diff --git a/docs/my-website/docs/pass_through/vertex_ai.md b/docs/my-website/docs/pass_through/vertex_ai.md index 744c5e3ff..601f89f4b 100644 --- a/docs/my-website/docs/pass_through/vertex_ai.md +++ b/docs/my-website/docs/pass_through/vertex_ai.md @@ -69,6 +69,44 @@ generateContent(); +## Quick Start + +Let's call the Vertex AI [`/generateContent` endpoint](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference) + +1. Add Vertex AI Credentials to your environment + +```bash +export DEFAULT_VERTEXAI_PROJECT="" # "adroit-crow-413218" +export DEFAULT_VERTEXAI_LOCATION="" # "us-central1" +export DEFAULT_GOOGLE_APPLICATION_CREDENTIALS="" # "/Users/Downloads/adroit-crow-413218-a956eef1a2a8.json" +``` + +2. Start LiteLLM Proxy + +```bash +litellm + +# RUNNING on http://0.0.0.0:4000 +``` + +3. Test it! + +Let's call the Google AI Studio token counting endpoint + +```bash +curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.0-pro:generateContent \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "contents":[{ + "role": "user", + "parts":[{"text": "How are you doing today?"}] + }] + }' +``` + + + ## Supported API Endpoints - Gemini API @@ -87,206 +125,12 @@ LiteLLM Proxy Server supports two methods of authentication to Vertex AI: 2. Set Vertex AI credentials on proxy server -## Quick Start Usage - - - - - -#### 1. Start litellm proxy - -```shell -litellm --config /path/to/config.yaml -``` - -#### 2. Test it - -```python -import vertexai -from vertexai.preview.generative_models import GenerativeModel - -LITE_LLM_ENDPOINT = "http://localhost:4000" - -vertexai.init( - project="", # enter your project id - location="", # enter your region - api_endpoint=f"{LITE_LLM_ENDPOINT}/vertex_ai", # route on litellm - api_transport="rest", -) - -model = GenerativeModel(model_name="gemini-1.0-pro") -model.generate_content("hi") - -``` - - - - - - -#### 1. Set `default_vertex_config` on your `config.yaml` - - -Add the following credentials to your litellm config.yaml to use the Vertex AI endpoints. - -```yaml -default_vertex_config: - vertex_project: "adroit-crow-413218" - vertex_location: "us-central1" - vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json -``` - -#### 2. Start litellm proxy - -```shell -litellm --config /path/to/config.yaml -``` - -#### 3. Test it - -```python -import vertexai -from google.auth.credentials import Credentials -from vertexai.generative_models import GenerativeModel - -LITELLM_PROXY_API_KEY = "sk-1234" -LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai" - -import datetime - - -class CredentialsWrapper(Credentials): - def __init__(self, token=None): - super().__init__() - self.token = token - self.expiry = None # or set to a future date if needed - - def refresh(self, request): - pass - - def apply(self, headers, token=None): - headers["Authorization"] = f"Bearer {self.token}" - - @property - def expired(self): - return False # Always consider the token as non-expired - - @property - def valid(self): - return True # Always consider the credentials as valid - - -credentials = CredentialsWrapper(token=LITELLM_PROXY_API_KEY) - -vertexai.init( - project="adroit-crow-413218", - location="us-central1", - api_endpoint=LITELLM_PROXY_BASE, - credentials=credentials, - api_transport="rest", -) - -model = GenerativeModel("gemini-1.5-flash-001") - -response = model.generate_content( - "What's a good name for a flower shop that specializes in selling bouquets of dried flowers?" -) - -print(response.text) -``` - - - - ## Usage Examples ### Gemini API (Generate Content) - - -```python -import vertexai -from vertexai.generative_models import GenerativeModel - -LITELLM_PROXY_API_KEY = "sk-1234" -LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai" - -vertexai.init( - project="adroit-crow-413218", - location="us-central1", - api_endpoint=LITELLM_PROXY_BASE, - api_transport="rest", - -) - -model = GenerativeModel("gemini-1.5-flash-001") - -response = model.generate_content( - "What's a good name for a flower shop that specializes in selling bouquets of dried flowers?" -) - -print(response.text) -``` - - - - -```python -import vertexai -from google.auth.credentials import Credentials -from vertexai.generative_models import GenerativeModel - -LITELLM_PROXY_API_KEY = "sk-1234" -LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai" - -import datetime - - -class CredentialsWrapper(Credentials): - def __init__(self, token=None): - super().__init__() - self.token = token - self.expiry = None # or set to a future date if needed - - def refresh(self, request): - pass - - def apply(self, headers, token=None): - headers["Authorization"] = f"Bearer {self.token}" - - @property - def expired(self): - return False # Always consider the token as non-expired - - @property - def valid(self): - return True # Always consider the credentials as valid - - -credentials = CredentialsWrapper(token=LITELLM_PROXY_API_KEY) - -vertexai.init( - project="adroit-crow-413218", - location="us-central1", - api_endpoint=LITELLM_PROXY_BASE, - credentials=credentials, - api_transport="rest", - -) - -model = GenerativeModel("gemini-1.5-flash-001") - -response = model.generate_content( - "What's a good name for a flower shop that specializes in selling bouquets of dried flowers?" -) - -print(response.text) -``` - - - ```shell curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:generateContent \ @@ -295,114 +139,10 @@ curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-0 -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}' ``` - - ### Embeddings API - - - - -```python -from typing import List, Optional -from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel -import vertexai -from vertexai.generative_models import GenerativeModel - -LITELLM_PROXY_API_KEY = "sk-1234" -LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai" - -import datetime - -vertexai.init( - project="adroit-crow-413218", - location="us-central1", - api_endpoint=LITELLM_PROXY_BASE, - api_transport="rest", -) - - -def embed_text( - texts: List[str] = ["banana muffins? ", "banana bread? banana muffins?"], - task: str = "RETRIEVAL_DOCUMENT", - model_name: str = "text-embedding-004", - dimensionality: Optional[int] = 256, -) -> List[List[float]]: - """Embeds texts with a pre-trained, foundational model.""" - model = TextEmbeddingModel.from_pretrained(model_name) - inputs = [TextEmbeddingInput(text, task) for text in texts] - kwargs = dict(output_dimensionality=dimensionality) if dimensionality else {} - embeddings = model.get_embeddings(inputs, **kwargs) - return [embedding.values for embedding in embeddings] -``` - - - - - -```python -from typing import List, Optional -from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel -import vertexai -from google.auth.credentials import Credentials -from vertexai.generative_models import GenerativeModel - -LITELLM_PROXY_API_KEY = "sk-1234" -LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai" - -import datetime - - -class CredentialsWrapper(Credentials): - def __init__(self, token=None): - super().__init__() - self.token = token - self.expiry = None # or set to a future date if needed - - def refresh(self, request): - pass - - def apply(self, headers, token=None): - headers["Authorization"] = f"Bearer {self.token}" - - @property - def expired(self): - return False # Always consider the token as non-expired - - @property - def valid(self): - return True # Always consider the credentials as valid - - -credentials = CredentialsWrapper(token=LITELLM_PROXY_API_KEY) - -vertexai.init( - project="adroit-crow-413218", - location="us-central1", - api_endpoint=LITELLM_PROXY_BASE, - credentials=credentials, - api_transport="rest", -) - - -def embed_text( - texts: List[str] = ["banana muffins? ", "banana bread? banana muffins?"], - task: str = "RETRIEVAL_DOCUMENT", - model_name: str = "text-embedding-004", - dimensionality: Optional[int] = 256, -) -> List[List[float]]: - """Embeds texts with a pre-trained, foundational model.""" - model = TextEmbeddingModel.from_pretrained(model_name) - inputs = [TextEmbeddingInput(text, task) for text in texts] - kwargs = dict(output_dimensionality=dimensionality) if dimensionality else {} - embeddings = model.get_embeddings(inputs, **kwargs) - return [embedding.values for embedding in embeddings] -``` - - - ```shell curl http://localhost:4000/vertex_ai/publishers/google/models/textembedding-gecko@001:predict \ @@ -411,133 +151,9 @@ curl http://localhost:4000/vertex_ai/publishers/google/models/textembedding-geck -d '{"instances":[{"content": "gm"}]}' ``` - - - ### Imagen API - - - - - -```python -from typing import List, Optional -from vertexai.preview.vision_models import ImageGenerationModel -import vertexai -from google.auth.credentials import Credentials - -LITELLM_PROXY_API_KEY = "sk-1234" -LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai" - -import datetime - -vertexai.init( - project="adroit-crow-413218", - location="us-central1", - api_endpoint=LITELLM_PROXY_BASE, - api_transport="rest", -) - -model = ImageGenerationModel.from_pretrained("imagen-3.0-generate-001") - -images = model.generate_images( - prompt=prompt, - # Optional parameters - number_of_images=1, - language="en", - # You can't use a seed value and watermark at the same time. - # add_watermark=False, - # seed=100, - aspect_ratio="1:1", - safety_filter_level="block_some", - person_generation="allow_adult", -) - -images[0].save(location=output_file, include_generation_parameters=False) - -# Optional. View the generated image in a notebook. -# images[0].show() - -print(f"Created output image using {len(images[0]._image_bytes)} bytes") - -``` - - - - -```python -from typing import List, Optional -from vertexai.preview.vision_models import ImageGenerationModel -import vertexai -from google.auth.credentials import Credentials - -LITELLM_PROXY_API_KEY = "sk-1234" -LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai" - -import datetime - - -class CredentialsWrapper(Credentials): - def __init__(self, token=None): - super().__init__() - self.token = token - self.expiry = None # or set to a future date if needed - - def refresh(self, request): - pass - - def apply(self, headers, token=None): - headers["Authorization"] = f"Bearer {self.token}" - - @property - def expired(self): - return False # Always consider the token as non-expired - - @property - def valid(self): - return True # Always consider the credentials as valid - - -credentials = CredentialsWrapper(token=LITELLM_PROXY_API_KEY) - -vertexai.init( - project="adroit-crow-413218", - location="us-central1", - api_endpoint=LITELLM_PROXY_BASE, - credentials=credentials, - api_transport="rest", -) - -model = ImageGenerationModel.from_pretrained("imagen-3.0-generate-001") - -images = model.generate_images( - prompt=prompt, - # Optional parameters - number_of_images=1, - language="en", - # You can't use a seed value and watermark at the same time. - # add_watermark=False, - # seed=100, - aspect_ratio="1:1", - safety_filter_level="block_some", - person_generation="allow_adult", -) - -images[0].save(location=output_file, include_generation_parameters=False) - -# Optional. View the generated image in a notebook. -# images[0].show() - -print(f"Created output image using {len(images[0]._image_bytes)} bytes") - -``` - - - - - ```shell curl http://localhost:4000/vertex_ai/publishers/google/models/imagen-3.0-generate-001:predict \ -H "Content-Type: application/json" \ @@ -545,252 +161,19 @@ curl http://localhost:4000/vertex_ai/publishers/google/models/imagen-3.0-generat -d '{"instances":[{"prompt": "make an otter"}], "parameters": {"sampleCount": 1}}' ``` - - - ### Count Tokens API - - - - - - -```python -from typing import List, Optional -from vertexai.generative_models import GenerativeModel -import vertexai - -LITELLM_PROXY_API_KEY = "sk-1234" -LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai" - -import datetime - -vertexai.init( - project="adroit-crow-413218", - location="us-central1", - api_endpoint=LITELLM_PROXY_BASE, - api_transport="rest", -) - - -model = GenerativeModel("gemini-1.5-flash-001") - -prompt = "Why is the sky blue?" - -# Prompt tokens count -response = model.count_tokens(prompt) -print(f"Prompt Token Count: {response.total_tokens}") -print(f"Prompt Character Count: {response.total_billable_characters}") - -# Send text to Gemini -response = model.generate_content(prompt) - -# Response tokens count -usage_metadata = response.usage_metadata -print(f"Prompt Token Count: {usage_metadata.prompt_token_count}") -print(f"Candidates Token Count: {usage_metadata.candidates_token_count}") -print(f"Total Token Count: {usage_metadata.total_token_count}") -``` - - - - - - -```python -from typing import List, Optional -from vertexai.generative_models import GenerativeModel -import vertexai -from google.auth.credentials import Credentials - -LITELLM_PROXY_API_KEY = "sk-1234" -LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai" - -import datetime - - -class CredentialsWrapper(Credentials): - def __init__(self, token=None): - super().__init__() - self.token = token - self.expiry = None # or set to a future date if needed - - def refresh(self, request): - pass - - def apply(self, headers, token=None): - headers["Authorization"] = f"Bearer {self.token}" - - @property - def expired(self): - return False # Always consider the token as non-expired - - @property - def valid(self): - return True # Always consider the credentials as valid - - -credentials = CredentialsWrapper(token=LITELLM_PROXY_API_KEY) - -vertexai.init( - project="adroit-crow-413218", - location="us-central1", - api_endpoint=LITELLM_PROXY_BASE, - credentials=credentials, - api_transport="rest", -) - - -model = GenerativeModel("gemini-1.5-flash-001") - -prompt = "Why is the sky blue?" - -# Prompt tokens count -response = model.count_tokens(prompt) -print(f"Prompt Token Count: {response.total_tokens}") -print(f"Prompt Character Count: {response.total_billable_characters}") - -# Send text to Gemini -response = model.generate_content(prompt) - -# Response tokens count -usage_metadata = response.usage_metadata -print(f"Prompt Token Count: {usage_metadata.prompt_token_count}") -print(f"Candidates Token Count: {usage_metadata.candidates_token_count}") -print(f"Total Token Count: {usage_metadata.total_token_count}") -``` - - - - - - - ```shell curl http://localhost:4000/vertex_ai/publishers/google/models/gemini-1.5-flash-001:countTokens \ -H "Content-Type: application/json" \ -H "x-litellm-api-key: Bearer sk-1234" \ -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}' ``` - - - - ### Tuning API Create Fine Tuning Job - - - - -```python -from typing import List, Optional -from vertexai.preview.tuning import sft -import vertexai - -LITELLM_PROXY_API_KEY = "sk-1234" -LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai" - - -vertexai.init( - project="adroit-crow-413218", - location="us-central1", - api_endpoint=LITELLM_PROXY_BASE, - api_transport="rest", -) - - -# TODO(developer): Update project -vertexai.init(project=PROJECT_ID, location="us-central1") - -sft_tuning_job = sft.train( - source_model="gemini-1.0-pro-002", - train_dataset="gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl", -) - -# Polling for job completion -while not sft_tuning_job.has_ended: - time.sleep(60) - sft_tuning_job.refresh() - -print(sft_tuning_job.tuned_model_name) -print(sft_tuning_job.tuned_model_endpoint_name) -print(sft_tuning_job.experiment) - -``` - - - - - -```python -from typing import List, Optional -from vertexai.preview.tuning import sft -import vertexai -from google.auth.credentials import Credentials - -LITELLM_PROXY_API_KEY = "sk-1234" -LITELLM_PROXY_BASE = "http://0.0.0.0:4000/vertex_ai" - -import datetime - - -class CredentialsWrapper(Credentials): - def __init__(self, token=None): - super().__init__() - self.token = token - self.expiry = None # or set to a future date if needed - - def refresh(self, request): - pass - - def apply(self, headers, token=None): - headers["Authorization"] = f"Bearer {self.token}" - - @property - def expired(self): - return False # Always consider the token as non-expired - - @property - def valid(self): - return True # Always consider the credentials as valid - - -credentials = CredentialsWrapper(token=LITELLM_PROXY_API_KEY) - -vertexai.init( - project="adroit-crow-413218", - location="us-central1", - api_endpoint=LITELLM_PROXY_BASE, - credentials=credentials, - api_transport="rest", -) - - -# TODO(developer): Update project -vertexai.init(project=PROJECT_ID, location="us-central1") - -sft_tuning_job = sft.train( - source_model="gemini-1.0-pro-002", - train_dataset="gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl", -) - -# Polling for job completion -while not sft_tuning_job.has_ended: - time.sleep(60) - sft_tuning_job.refresh() - -print(sft_tuning_job.tuned_model_name) -print(sft_tuning_job.tuned_model_endpoint_name) -print(sft_tuning_job.experiment) -``` - - - - ```shell curl http://localhost:4000/vertex_ai/tuningJobs \ @@ -804,118 +187,6 @@ curl http://localhost:4000/vertex_ai/tuningJobs \ }' ``` - - - - - -### Context Caching - -Use Vertex AI Context Caching - -[**Relevant VertexAI Docs**](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-overview) - - - - - -1. Add model to config.yaml -```yaml -model_list: - # used for /chat/completions, /completions, /embeddings endpoints - - model_name: gemini-1.5-pro-001 - litellm_params: - model: vertex_ai/gemini-1.5-pro-001 - vertex_project: "project-id" - vertex_location: "us-central1" - vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json - -# used for the /cachedContent and vertexAI native endpoints -default_vertex_config: - vertex_project: "adroit-crow-413218" - vertex_location: "us-central1" - vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json - -``` - -2. Start Proxy - -``` -$ litellm --config /path/to/config.yaml -``` - -3. Make Request! -We make the request in two steps: -- Create a cachedContents object -- Use the cachedContents object in your /chat/completions - -**Create a cachedContents object** - -First, create a cachedContents object by calling the Vertex `cachedContents` endpoint. The LiteLLM proxy forwards the `/cachedContents` request to the VertexAI API. - -```python -import httpx - -# Set Litellm proxy variables -LITELLM_BASE_URL = "http://0.0.0.0:4000" -LITELLM_PROXY_API_KEY = "sk-1234" - -httpx_client = httpx.Client(timeout=30) - -print("Creating cached content") -create_cache = httpx_client.post( - url=f"{LITELLM_BASE_URL}/vertex_ai/cachedContents", - headers={"x-litellm-api-key": f"Bearer {LITELLM_PROXY_API_KEY}"}, - json={ - "model": "gemini-1.5-pro-001", - "contents": [ - { - "role": "user", - "parts": [{ - "text": "This is sample text to demonstrate explicit caching." * 4000 - }] - } - ], - } -) - -print("Response from create_cache:", create_cache) -create_cache_response = create_cache.json() -print("JSON from create_cache:", create_cache_response) -cached_content_name = create_cache_response["name"] -``` - -**Use the cachedContents object in your /chat/completions request to VertexAI** - -```python -import openai - -# Set Litellm proxy variables -LITELLM_BASE_URL = "http://0.0.0.0:4000" -LITELLM_PROXY_API_KEY = "sk-1234" - -client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL) - -response = client.chat.completions.create( - model="gemini-1.5-pro-001", - max_tokens=8192, - messages=[ - { - "role": "user", - "content": "What is the sample text about?", - }, - ], - temperature=0.7, - extra_body={"cached_content": cached_content_name}, # Use the cached content -) - -print("Response from proxy:", response) -``` - - - - - ## Advanced Pre-requisites @@ -930,6 +201,11 @@ Use this, to avoid giving developers the raw Anthropic API key, but still lettin ```bash export DATABASE_URL="" export LITELLM_MASTER_KEY="" + +# vertex ai credentials +export DEFAULT_VERTEXAI_PROJECT="" # "adroit-crow-413218" +export DEFAULT_VERTEXAI_LOCATION="" # "us-central1" +export DEFAULT_GOOGLE_APPLICATION_CREDENTIALS="" # "/Users/Downloads/adroit-crow-413218-a956eef1a2a8.json" ``` ```bash diff --git a/docs/my-website/docs/proxy/config_management.md b/docs/my-website/docs/proxy/config_management.md new file mode 100644 index 000000000..4f7c5775b --- /dev/null +++ b/docs/my-website/docs/proxy/config_management.md @@ -0,0 +1,59 @@ +# File Management + +## `include` external YAML files in a config.yaml + +You can use `include` to include external YAML files in a config.yaml. + +**Quick Start Usage:** + +To include a config file, use `include` with either a single file or a list of files. + +Contents of `parent_config.yaml`: +```yaml +include: + - model_config.yaml # 👈 Key change, will include the contents of model_config.yaml + +litellm_settings: + callbacks: ["prometheus"] +``` + + +Contents of `model_config.yaml`: +```yaml +model_list: + - model_name: gpt-4o + litellm_params: + model: openai/gpt-4o + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + - model_name: fake-anthropic-endpoint + litellm_params: + model: anthropic/fake + api_base: https://exampleanthropicendpoint-production.up.railway.app/ + +``` + +Start proxy server + +This will start the proxy server with config `parent_config.yaml`. Since the `include` directive is used, the server will also include the contents of `model_config.yaml`. +``` +litellm --config parent_config.yaml --detailed_debug +``` + + + + + +## Examples using `include` + +Include a single file: +```yaml +include: + - model_config.yaml +``` + +Include multiple files: +```yaml +include: + - model_config.yaml + - another_config.yaml +``` \ No newline at end of file diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md new file mode 100644 index 000000000..c762a0716 --- /dev/null +++ b/docs/my-website/docs/proxy/config_settings.md @@ -0,0 +1,507 @@ +# All settings + + +```yaml +environment_variables: {} + +model_list: + - model_name: string + litellm_params: {} + model_info: + id: string + mode: embedding + input_cost_per_token: 0 + output_cost_per_token: 0 + max_tokens: 2048 + base_model: gpt-4-1106-preview + additionalProp1: {} + +litellm_settings: + # Logging/Callback settings + success_callback: ["langfuse"] # list of success callbacks + failure_callback: ["sentry"] # list of failure callbacks + callbacks: ["otel"] # list of callbacks - runs on success and failure + service_callbacks: ["datadog", "prometheus"] # logs redis, postgres failures on datadog, prometheus + turn_off_message_logging: boolean # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged. + redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging. + langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] # default tags for Langfuse Logging + + # Networking settings + request_timeout: 10 # (int) llm requesttimeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout + force_ipv4: boolean # If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API + + set_verbose: boolean # sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION + json_logs: boolean # if true, logs will be in json format + + # Fallbacks, reliability + default_fallbacks: ["claude-opus"] # set default_fallbacks, in case a specific model group is misconfigured / bad. + content_policy_fallbacks: [{"gpt-3.5-turbo-small": ["claude-opus"]}] # fallbacks for ContentPolicyErrors + context_window_fallbacks: [{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large", "claude-opus"]}] # fallbacks for ContextWindowExceededErrors + + + + # Caching settings + cache: true + cache_params: # set cache params for redis + type: redis # type of cache to initialize + + # Optional - Redis Settings + host: "localhost" # The host address for the Redis cache. Required if type is "redis". + port: 6379 # The port number for the Redis cache. Required if type is "redis". + password: "your_password" # The password for the Redis cache. Required if type is "redis". + namespace: "litellm.caching.caching" # namespace for redis cache + + # Optional - Redis Cluster Settings + redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}] + + # Optional - Redis Sentinel Settings + service_name: "mymaster" + sentinel_nodes: [["localhost", 26379]] + + # Optional - Qdrant Semantic Cache Settings + qdrant_semantic_cache_embedding_model: openai-embedding # the model should be defined on the model_list + qdrant_collection_name: test_collection + qdrant_quantization_config: binary + similarity_threshold: 0.8 # similarity threshold for semantic cache + + # Optional - S3 Cache Settings + s3_bucket_name: cache-bucket-litellm # AWS Bucket Name for S3 + s3_region_name: us-west-2 # AWS Region Name for S3 + s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/ to pass environment variables. This is AWS Access Key ID for S3 + s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3 + s3_endpoint_url: https://s3.amazonaws.com # [OPTIONAL] S3 endpoint URL, if you want to use Backblaze/cloudflare s3 bucket + + # Common Cache settings + # Optional - Supported call types for caching + supported_call_types: ["acompletion", "atext_completion", "aembedding", "atranscription"] + # /chat/completions, /completions, /embeddings, /audio/transcriptions + mode: default_off # if default_off, you need to opt in to caching on a per call basis + ttl: 600 # ttl for caching + + +callback_settings: + otel: + message_logging: boolean # OTEL logging callback specific settings + +general_settings: + completion_model: string + disable_spend_logs: boolean # turn off writing each transaction to the db + disable_master_key_return: boolean # turn off returning master key on UI (checked on '/user/info' endpoint) + disable_retry_on_max_parallel_request_limit_error: boolean # turn off retries when max parallel request limit is reached + disable_reset_budget: boolean # turn off reset budget scheduled task + disable_adding_master_key_hash_to_db: boolean # turn off storing master key hash in db, for spend tracking + enable_jwt_auth: boolean # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims + enforce_user_param: boolean # requires all openai endpoint requests to have a 'user' param + allowed_routes: ["route1", "route2"] # list of allowed proxy API routes - a user can access. (currently JWT-Auth only) + key_management_system: google_kms # either google_kms or azure_kms + master_key: string + + # Database Settings + database_url: string + database_connection_pool_limit: 0 # default 100 + database_connection_timeout: 0 # default 60s + allow_requests_on_db_unavailable: boolean # if true, will allow requests that can not connect to the DB to verify Virtual Key to still work + + custom_auth: string + max_parallel_requests: 0 # the max parallel requests allowed per deployment + global_max_parallel_requests: 0 # the max parallel requests allowed on the proxy all up + infer_model_from_keys: true + background_health_checks: true + health_check_interval: 300 + alerting: ["slack", "email"] + alerting_threshold: 0 + use_client_credentials_pass_through_routes: boolean # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints +``` + +### litellm_settings - Reference + +| Name | Type | Description | +|------|------|-------------| +| success_callback | array of strings | List of success callbacks. [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) | +| failure_callback | array of strings | List of failure callbacks [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) | +| callbacks | array of strings | List of callbacks - runs on success and failure [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) | +| service_callbacks | array of strings | System health monitoring - Logs redis, postgres failures on specified services (e.g. datadog, prometheus) [Doc Metrics](prometheus) | +| turn_off_message_logging | boolean | If true, prevents messages and responses from being logged to callbacks, but request metadata will still be logged [Proxy Logging](logging) | +| modify_params | boolean | If true, allows modifying the parameters of the request before it is sent to the LLM provider | +| enable_preview_features | boolean | If true, enables preview features - e.g. Azure O1 Models with streaming support.| +| redact_user_api_key_info | boolean | If true, redacts information about the user api key from logs [Proxy Logging](logging#redacting-userapikeyinfo) | +| langfuse_default_tags | array of strings | Default tags for Langfuse Logging. Use this if you want to control which LiteLLM-specific fields are logged as tags by the LiteLLM proxy. By default LiteLLM Proxy logs no LiteLLM-specific fields as tags. [Further docs](./logging#litellm-specific-tags-on-langfuse---cache_hit-cache_key) | +| set_verbose | boolean | If true, sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION | +| json_logs | boolean | If true, logs will be in json format. If you need to store the logs as JSON, just set the `litellm.json_logs = True`. We currently just log the raw POST request from litellm as a JSON [Further docs](./debugging) | +| default_fallbacks | array of strings | List of fallback models to use if a specific model group is misconfigured / bad. [Further docs](./reliability#default-fallbacks) | +| request_timeout | integer | The timeout for requests in seconds. If not set, the default value is `6000 seconds`. [For reference OpenAI Python SDK defaults to `600 seconds`.](https://github.com/openai/openai-python/blob/main/src/openai/_constants.py) | +| force_ipv4 | boolean | If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API | +| content_policy_fallbacks | array of objects | Fallbacks to use when a ContentPolicyViolationError is encountered. [Further docs](./reliability#content-policy-fallbacks) | +| context_window_fallbacks | array of objects | Fallbacks to use when a ContextWindowExceededError is encountered. [Further docs](./reliability#context-window-fallbacks) | +| cache | boolean | If true, enables caching. [Further docs](./caching) | +| cache_params | object | Parameters for the cache. [Further docs](./caching) | +| cache_params.type | string | The type of cache to initialize. Can be one of ["local", "redis", "redis-semantic", "s3", "disk", "qdrant-semantic"]. Defaults to "redis". [Furher docs](./caching) | +| cache_params.host | string | The host address for the Redis cache. Required if type is "redis". | +| cache_params.port | integer | The port number for the Redis cache. Required if type is "redis". | +| cache_params.password | string | The password for the Redis cache. Required if type is "redis". | +| cache_params.namespace | string | The namespace for the Redis cache. | +| cache_params.redis_startup_nodes | array of objects | Redis Cluster Settings. [Further docs](./caching) | +| cache_params.service_name | string | Redis Sentinel Settings. [Further docs](./caching) | +| cache_params.sentinel_nodes | array of arrays | Redis Sentinel Settings. [Further docs](./caching) | +| cache_params.ttl | integer | The time (in seconds) to store entries in cache. | +| cache_params.qdrant_semantic_cache_embedding_model | string | The embedding model to use for qdrant semantic cache. | +| cache_params.qdrant_collection_name | string | The name of the collection to use for qdrant semantic cache. | +| cache_params.qdrant_quantization_config | string | The quantization configuration for the qdrant semantic cache. | +| cache_params.similarity_threshold | float | The similarity threshold for the semantic cache. | +| cache_params.s3_bucket_name | string | The name of the S3 bucket to use for the semantic cache. | +| cache_params.s3_region_name | string | The region name for the S3 bucket. | +| cache_params.s3_aws_access_key_id | string | The AWS access key ID for the S3 bucket. | +| cache_params.s3_aws_secret_access_key | string | The AWS secret access key for the S3 bucket. | +| cache_params.s3_endpoint_url | string | Optional - The endpoint URL for the S3 bucket. | +| cache_params.supported_call_types | array of strings | The types of calls to cache. [Further docs](./caching) | +| cache_params.mode | string | The mode of the cache. [Further docs](./caching) | +| disable_end_user_cost_tracking | boolean | If true, turns off end user cost tracking on prometheus metrics + litellm spend logs table on proxy. | +| key_generation_settings | object | Restricts who can generate keys. [Further docs](./virtual_keys.md#restricting-key-generation) | + +### general_settings - Reference + +| Name | Type | Description | +|------|------|-------------| +| completion_model | string | The default model to use for completions when `model` is not specified in the request | +| disable_spend_logs | boolean | If true, turns off writing each transaction to the database | +| disable_master_key_return | boolean | If true, turns off returning master key on UI. (checked on '/user/info' endpoint) | +| disable_retry_on_max_parallel_request_limit_error | boolean | If true, turns off retries when max parallel request limit is reached | +| disable_reset_budget | boolean | If true, turns off reset budget scheduled task | +| disable_adding_master_key_hash_to_db | boolean | If true, turns off storing master key hash in db | +| enable_jwt_auth | boolean | allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims. [Doc on JWT Tokens](token_auth) | +| enforce_user_param | boolean | If true, requires all OpenAI endpoint requests to have a 'user' param. [Doc on call hooks](call_hooks)| +| allowed_routes | array of strings | List of allowed proxy API routes a user can access [Doc on controlling allowed routes](enterprise#control-available-public-private-routes)| +| key_management_system | string | Specifies the key management system. [Doc Secret Managers](../secret) | +| master_key | string | The master key for the proxy [Set up Virtual Keys](virtual_keys) | +| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) | +| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) | +| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) | +| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key | +| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) | +| max_parallel_requests | integer | The max parallel requests allowed per deployment | +| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall | +| infer_model_from_keys | boolean | If true, infers the model from the provided keys | +| background_health_checks | boolean | If true, enables background health checks. [Doc on health checks](health) | +| health_check_interval | integer | The interval for health checks in seconds [Doc on health checks](health) | +| alerting | array of strings | List of alerting methods [Doc on Slack Alerting](alerting) | +| alerting_threshold | integer | The threshold for triggering alerts [Doc on Slack Alerting](alerting) | +| use_client_credentials_pass_through_routes | boolean | If true, uses client credentials for all pass-through routes. [Doc on pass through routes](pass_through) | +| health_check_details | boolean | If false, hides health check details (e.g. remaining rate limit). [Doc on health checks](health) | +| public_routes | List[str] | (Enterprise Feature) Control list of public routes | +| alert_types | List[str] | Control list of alert types to send to slack (Doc on alert types)[./alerting.md] | +| enforced_params | List[str] | (Enterprise Feature) List of params that must be included in all requests to the proxy | +| enable_oauth2_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication | +| use_x_forwarded_for | str | If true, uses the X-Forwarded-For header to get the client IP address | +| service_account_settings | List[Dict[str, Any]] | Set `service_account_settings` if you want to create settings that only apply to service account keys (Doc on service accounts)[./service_accounts.md] | +| image_generation_model | str | The default model to use for image generation - ignores model set in request | +| store_model_in_db | boolean | If true, allows `/model/new` endpoint to store model information in db. Endpoint disabled by default. [Doc on `/model/new` endpoint](./model_management.md#create-a-new-model) | +| max_request_size_mb | int | The maximum size for requests in MB. Requests above this size will be rejected. | +| max_response_size_mb | int | The maximum size for responses in MB. LLM Responses above this size will not be sent. | +| proxy_budget_rescheduler_min_time | int | The minimum time (in seconds) to wait before checking db for budget resets. **Default is 597 seconds** | +| proxy_budget_rescheduler_max_time | int | The maximum time (in seconds) to wait before checking db for budget resets. **Default is 605 seconds** | +| proxy_batch_write_at | int | Time (in seconds) to wait before batch writing spend logs to the db. **Default is 10 seconds** | +| alerting_args | dict | Args for Slack Alerting [Doc on Slack Alerting](./alerting.md) | +| custom_key_generate | str | Custom function for key generation [Doc on custom key generation](./virtual_keys.md#custom--key-generate) | +| allowed_ips | List[str] | List of IPs allowed to access the proxy. If not set, all IPs are allowed. | +| embedding_model | str | The default model to use for embeddings - ignores model set in request | +| default_team_disabled | boolean | If true, users cannot create 'personal' keys (keys with no team_id). | +| alert_to_webhook_url | Dict[str] | [Specify a webhook url for each alert type.](./alerting.md#set-specific-slack-channels-per-alert-type) | +| key_management_settings | List[Dict[str, Any]] | Settings for key management system (e.g. AWS KMS, Azure Key Vault) [Doc on key management](../secret.md) | +| allow_user_auth | boolean | (Deprecated) old approach for user authentication. | +| user_api_key_cache_ttl | int | The time (in seconds) to cache user api keys in memory. | +| disable_prisma_schema_update | boolean | If true, turns off automatic schema updates to DB | +| litellm_key_header_name | str | If set, allows passing LiteLLM keys as a custom header. [Doc on custom headers](./virtual_keys.md#custom-headers) | +| moderation_model | str | The default model to use for moderation. | +| custom_sso | str | Path to a python file that implements custom SSO logic. [Doc on custom SSO](./custom_sso.md) | +| allow_client_side_credentials | boolean | If true, allows passing client side credentials to the proxy. (Useful when testing finetuning models) [Doc on client side credentials](./virtual_keys.md#client-side-credentials) | +| admin_only_routes | List[str] | (Enterprise Feature) List of routes that are only accessible to admin users. [Doc on admin only routes](./enterprise#control-available-public-private-routes) | +| use_azure_key_vault | boolean | If true, load keys from azure key vault | +| use_google_kms | boolean | If true, load keys from google kms | +| spend_report_frequency | str | Specify how often you want a Spend Report to be sent (e.g. "1d", "2d", "30d") [More on this](./alerting.md#spend-report-frequency) | +| ui_access_mode | Literal["admin_only"] | If set, restricts access to the UI to admin users only. [Docs](./ui.md#restrict-ui-access) | +| litellm_jwtauth | Dict[str, Any] | Settings for JWT authentication. [Docs](./token_auth.md) | +| litellm_license | str | The license key for the proxy. [Docs](../enterprise.md#how-does-deployment-with-enterprise-license-work) | +| oauth2_config_mappings | Dict[str, str] | Define the OAuth2 config mappings | +| pass_through_endpoints | List[Dict[str, Any]] | Define the pass through endpoints. [Docs](./pass_through) | +| enable_oauth2_proxy_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication | +| forward_openai_org_id | boolean | If true, forwards the OpenAI Organization ID to the backend LLM call (if it's OpenAI). | +| forward_client_headers_to_llm_api | boolean | If true, forwards the client headers (any `x-` headers) to the backend LLM call | + +### router_settings - Reference + +:::info + +Most values can also be set via `litellm_settings`. If you see overlapping values, settings on `router_settings` will override those on `litellm_settings`. +::: + +```yaml +router_settings: + routing_strategy: usage-based-routing-v2 # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle" + redis_host: # string + redis_password: # string + redis_port: # string + enable_pre_call_check: true # bool - Before call is made check if a call is within model context window + allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. + cooldown_time: 30 # (in seconds) how long to cooldown model if fails/min > allowed_fails + disable_cooldowns: True # bool - Disable cooldowns for all models + enable_tag_filtering: True # bool - Use tag based routing for requests + retry_policy: { # Dict[str, int]: retry policy for different types of exceptions + "AuthenticationErrorRetries": 3, + "TimeoutErrorRetries": 3, + "RateLimitErrorRetries": 3, + "ContentPolicyViolationErrorRetries": 4, + "InternalServerErrorRetries": 4 + } + allowed_fails_policy: { + "BadRequestErrorAllowedFails": 1000, # Allow 1000 BadRequestErrors before cooling down a deployment + "AuthenticationErrorAllowedFails": 10, # int + "TimeoutErrorAllowedFails": 12, # int + "RateLimitErrorAllowedFails": 10000, # int + "ContentPolicyViolationErrorAllowedFails": 15, # int + "InternalServerErrorAllowedFails": 20, # int + } + content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for content policy violations + fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for all errors +``` + +| Name | Type | Description | +|------|------|-------------| +| routing_strategy | string | The strategy used for routing requests. Options: "simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing". Default is "simple-shuffle". [More information here](../routing) | +| redis_host | string | The host address for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them** | +| redis_password | string | The password for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them** | +| redis_port | string | The port number for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them**| +| enable_pre_call_check | boolean | If true, checks if a call is within the model's context window before making the call. [More information here](reliability) | +| content_policy_fallbacks | array of objects | Specifies fallback models for content policy violations. [More information here](reliability) | +| fallbacks | array of objects | Specifies fallback models for all types of errors. [More information here](reliability) | +| enable_tag_filtering | boolean | If true, uses tag based routing for requests [Tag Based Routing](tag_routing) | +| cooldown_time | integer | The duration (in seconds) to cooldown a model if it exceeds the allowed failures. | +| disable_cooldowns | boolean | If true, disables cooldowns for all models. [More information here](reliability) | +| retry_policy | object | Specifies the number of retries for different types of exceptions. [More information here](reliability) | +| allowed_fails | integer | The number of failures allowed before cooling down a model. [More information here](reliability) | +| allowed_fails_policy | object | Specifies the number of allowed failures for different error types before cooling down a deployment. [More information here](reliability) | +| default_max_parallel_requests | Optional[int] | The default maximum number of parallel requests for a deployment. | +| default_priority | (Optional[int]) | The default priority for a request. Only for '.scheduler_acompletion()'. Default is None. | +| polling_interval | (Optional[float]) | frequency of polling queue. Only for '.scheduler_acompletion()'. Default is 3ms. | +| max_fallbacks | Optional[int] | The maximum number of fallbacks to try before exiting the call. Defaults to 5. | +| default_litellm_params | Optional[dict] | The default litellm parameters to add to all requests (e.g. `temperature`, `max_tokens`). | +| timeout | Optional[float] | The default timeout for a request. | +| debug_level | Literal["DEBUG", "INFO"] | The debug level for the logging library in the router. Defaults to "INFO". | +| client_ttl | int | Time-to-live for cached clients in seconds. Defaults to 3600. | +| cache_kwargs | dict | Additional keyword arguments for the cache initialization. | +| routing_strategy_args | dict | Additional keyword arguments for the routing strategy - e.g. lowest latency routing default ttl | +| model_group_alias | dict | Model group alias mapping. E.g. `{"claude-3-haiku": "claude-3-haiku-20240229"}` | +| num_retries | int | Number of retries for a request. Defaults to 3. | +| default_fallbacks | Optional[List[str]] | Fallbacks to try if no model group-specific fallbacks are defined. | +| caching_groups | Optional[List[tuple]] | List of model groups for caching across model groups. Defaults to None. - e.g. caching_groups=[("openai-gpt-3.5-turbo", "azure-gpt-3.5-turbo")]| +| alerting_config | AlertingConfig | [SDK-only arg] Slack alerting configuration. Defaults to None. [Further Docs](../routing.md#alerting-) | +| assistants_config | AssistantsConfig | Set on proxy via `assistant_settings`. [Further docs](../assistants.md) | +| set_verbose | boolean | [DEPRECATED PARAM - see debug docs](./debugging.md) If true, sets the logging level to verbose. | +| retry_after | int | Time to wait before retrying a request in seconds. Defaults to 0. If `x-retry-after` is received from LLM API, this value is overridden. | +| provider_budget_config | ProviderBudgetConfig | Provider budget configuration. Use this to set llm_provider budget limits. example $100/day to OpenAI, $100/day to Azure, etc. Defaults to None. [Further Docs](./provider_budget_routing.md) | +| enable_pre_call_checks | boolean | If true, checks if a call is within the model's context window before making the call. [More information here](reliability) | +| model_group_retry_policy | Dict[str, RetryPolicy] | [SDK-only arg] Set retry policy for model groups. | +| context_window_fallbacks | List[Dict[str, List[str]]] | Fallback models for context window violations. | +| redis_url | str | URL for Redis server. **Known performance issue with Redis URL.** | +| cache_responses | boolean | Flag to enable caching LLM Responses, if cache set under `router_settings`. If true, caches responses. Defaults to False. | +| router_general_settings | RouterGeneralSettings | [SDK-Only] Router general settings - contains optimizations like 'async_only_mode'. [Docs](../routing.md#router-general-settings) | + +### environment variables - Reference + +| Name | Description | +|------|-------------| +| ACTIONS_ID_TOKEN_REQUEST_TOKEN | Token for requesting ID in GitHub Actions +| ACTIONS_ID_TOKEN_REQUEST_URL | URL for requesting ID token in GitHub Actions +| AISPEND_ACCOUNT_ID | Account ID for AI Spend +| AISPEND_API_KEY | API Key for AI Spend +| ALLOWED_EMAIL_DOMAINS | List of email domains allowed for access +| ARIZE_API_KEY | API key for Arize platform integration +| ARIZE_SPACE_KEY | Space key for Arize platform +| ARGILLA_BATCH_SIZE | Batch size for Argilla logging +| ARGILLA_API_KEY | API key for Argilla platform +| ARGILLA_SAMPLING_RATE | Sampling rate for Argilla logging +| ARGILLA_DATASET_NAME | Dataset name for Argilla logging +| ARGILLA_BASE_URL | Base URL for Argilla service +| ATHINA_API_KEY | API key for Athina service +| AUTH_STRATEGY | Strategy used for authentication (e.g., OAuth, API key) +| AWS_ACCESS_KEY_ID | Access Key ID for AWS services +| AWS_PROFILE_NAME | AWS CLI profile name to be used +| AWS_REGION_NAME | Default AWS region for service interactions +| AWS_ROLE_NAME | Role name for AWS IAM usage +| AWS_SECRET_ACCESS_KEY | Secret Access Key for AWS services +| AWS_SESSION_NAME | Name for AWS session +| AWS_WEB_IDENTITY_TOKEN | Web identity token for AWS +| AZURE_API_VERSION | Version of the Azure API being used +| AZURE_AUTHORITY_HOST | Azure authority host URL +| AZURE_CLIENT_ID | Client ID for Azure services +| AZURE_CLIENT_SECRET | Client secret for Azure services +| AZURE_FEDERATED_TOKEN_FILE | File path to Azure federated token +| AZURE_KEY_VAULT_URI | URI for Azure Key Vault +| AZURE_TENANT_ID | Tenant ID for Azure Active Directory +| BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service +| BRAINTRUST_API_KEY | API key for Braintrust integration +| CIRCLE_OIDC_TOKEN | OpenID Connect token for CircleCI +| CIRCLE_OIDC_TOKEN_V2 | Version 2 of the OpenID Connect token for CircleCI +| CONFIG_FILE_PATH | File path for configuration file +| CUSTOM_TIKTOKEN_CACHE_DIR | Custom directory for Tiktoken cache +| DATABASE_HOST | Hostname for the database server +| DATABASE_NAME | Name of the database +| DATABASE_PASSWORD | Password for the database user +| DATABASE_PORT | Port number for database connection +| DATABASE_SCHEMA | Schema name used in the database +| DATABASE_URL | Connection URL for the database +| DATABASE_USER | Username for database connection +| DATABASE_USERNAME | Alias for database user +| DATABRICKS_API_BASE | Base URL for Databricks API +| DD_BASE_URL | Base URL for Datadog integration +| DATADOG_BASE_URL | (Alternative to DD_BASE_URL) Base URL for Datadog integration +| _DATADOG_BASE_URL | (Alternative to DD_BASE_URL) Base URL for Datadog integration +| DD_API_KEY | API key for Datadog integration +| DD_SITE | Site URL for Datadog (e.g., datadoghq.com) +| DD_SOURCE | Source identifier for Datadog logs +| DD_ENV | Environment identifier for Datadog logs. Only supported for `datadog_llm_observability` callback +| DD_SERVICE | Service identifier for Datadog logs. Defaults to "litellm-server" +| DD_VERSION | Version identifier for Datadog logs. Defaults to "unknown" +| DEBUG_OTEL | Enable debug mode for OpenTelemetry +| DIRECT_URL | Direct URL for service endpoint +| DISABLE_ADMIN_UI | Toggle to disable the admin UI +| DISABLE_SCHEMA_UPDATE | Toggle to disable schema updates +| DOCS_DESCRIPTION | Description text for documentation pages +| DOCS_FILTERED | Flag indicating filtered documentation +| DOCS_TITLE | Title of the documentation pages +| DOCS_URL | The path to the Swagger API documentation. **By default this is "/"** +| EMAIL_SUPPORT_CONTACT | Support contact email address +| GCS_BUCKET_NAME | Name of the Google Cloud Storage bucket +| GCS_PATH_SERVICE_ACCOUNT | Path to the Google Cloud service account JSON file +| GCS_FLUSH_INTERVAL | Flush interval for GCS logging (in seconds). Specify how often you want a log to be sent to GCS. **Default is 20 seconds** +| GCS_BATCH_SIZE | Batch size for GCS logging. Specify after how many logs you want to flush to GCS. If `BATCH_SIZE` is set to 10, logs are flushed every 10 logs. **Default is 2048** +| GENERIC_AUTHORIZATION_ENDPOINT | Authorization endpoint for generic OAuth providers +| GENERIC_CLIENT_ID | Client ID for generic OAuth providers +| GENERIC_CLIENT_SECRET | Client secret for generic OAuth providers +| GENERIC_CLIENT_STATE | State parameter for generic client authentication +| GENERIC_INCLUDE_CLIENT_ID | Include client ID in requests for OAuth +| GENERIC_SCOPE | Scope settings for generic OAuth providers +| GENERIC_TOKEN_ENDPOINT | Token endpoint for generic OAuth providers +| GENERIC_USER_DISPLAY_NAME_ATTRIBUTE | Attribute for user's display name in generic auth +| GENERIC_USER_EMAIL_ATTRIBUTE | Attribute for user's email in generic auth +| GENERIC_USER_FIRST_NAME_ATTRIBUTE | Attribute for user's first name in generic auth +| GENERIC_USER_ID_ATTRIBUTE | Attribute for user ID in generic auth +| GENERIC_USER_LAST_NAME_ATTRIBUTE | Attribute for user's last name in generic auth +| GENERIC_USER_PROVIDER_ATTRIBUTE | Attribute specifying the user's provider +| GENERIC_USER_ROLE_ATTRIBUTE | Attribute specifying the user's role +| GENERIC_USERINFO_ENDPOINT | Endpoint to fetch user information in generic OAuth +| GALILEO_BASE_URL | Base URL for Galileo platform +| GALILEO_PASSWORD | Password for Galileo authentication +| GALILEO_PROJECT_ID | Project ID for Galileo usage +| GALILEO_USERNAME | Username for Galileo authentication +| GREENSCALE_API_KEY | API key for Greenscale service +| GREENSCALE_ENDPOINT | Endpoint URL for Greenscale service +| GOOGLE_APPLICATION_CREDENTIALS | Path to Google Cloud credentials JSON file +| GOOGLE_CLIENT_ID | Client ID for Google OAuth +| GOOGLE_CLIENT_SECRET | Client secret for Google OAuth +| GOOGLE_KMS_RESOURCE_NAME | Name of the resource in Google KMS +| HF_API_BASE | Base URL for Hugging Face API +| HELICONE_API_KEY | API key for Helicone service +| HUGGINGFACE_API_BASE | Base URL for Hugging Face API +| IAM_TOKEN_DB_AUTH | IAM token for database authentication +| JSON_LOGS | Enable JSON formatted logging +| JWT_AUDIENCE | Expected audience for JWT tokens +| JWT_PUBLIC_KEY_URL | URL to fetch public key for JWT verification +| LAGO_API_BASE | Base URL for Lago API +| LAGO_API_CHARGE_BY | Parameter to determine charge basis in Lago +| LAGO_API_EVENT_CODE | Event code for Lago API events +| LAGO_API_KEY | API key for accessing Lago services +| LANGFUSE_DEBUG | Toggle debug mode for Langfuse +| LANGFUSE_FLUSH_INTERVAL | Interval for flushing Langfuse logs +| LANGFUSE_HOST | Host URL for Langfuse service +| LANGFUSE_PUBLIC_KEY | Public key for Langfuse authentication +| LANGFUSE_RELEASE | Release version of Langfuse integration +| LANGFUSE_SECRET_KEY | Secret key for Langfuse authentication +| LANGSMITH_API_KEY | API key for Langsmith platform +| LANGSMITH_BASE_URL | Base URL for Langsmith service +| LANGSMITH_BATCH_SIZE | Batch size for operations in Langsmith +| LANGSMITH_DEFAULT_RUN_NAME | Default name for Langsmith run +| LANGSMITH_PROJECT | Project name for Langsmith integration +| LANGSMITH_SAMPLING_RATE | Sampling rate for Langsmith logging +| LANGTRACE_API_KEY | API key for Langtrace service +| LITERAL_API_KEY | API key for Literal integration +| LITERAL_API_URL | API URL for Literal service +| LITERAL_BATCH_SIZE | Batch size for Literal operations +| LITELLM_DONT_SHOW_FEEDBACK_BOX | Flag to hide feedback box in LiteLLM UI +| LITELLM_DROP_PARAMS | Parameters to drop in LiteLLM requests +| LITELLM_EMAIL | Email associated with LiteLLM account +| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRIES | Maximum retries for parallel requests in LiteLLM +| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRY_TIMEOUT | Timeout for retries of parallel requests in LiteLLM +| LITELLM_HOSTED_UI | URL of the hosted UI for LiteLLM +| LITELLM_LICENSE | License key for LiteLLM usage +| LITELLM_LOCAL_MODEL_COST_MAP | Local configuration for model cost mapping in LiteLLM +| LITELLM_LOG | Enable detailed logging for LiteLLM +| LITELLM_MODE | Operating mode for LiteLLM (e.g., production, development) +| LITELLM_SALT_KEY | Salt key for encryption in LiteLLM +| LITELLM_SECRET_AWS_KMS_LITELLM_LICENSE | AWS KMS encrypted license for LiteLLM +| LITELLM_TOKEN | Access token for LiteLLM integration +| LOGFIRE_TOKEN | Token for Logfire logging service +| MICROSOFT_CLIENT_ID | Client ID for Microsoft services +| MICROSOFT_CLIENT_SECRET | Client secret for Microsoft services +| MICROSOFT_TENANT | Tenant ID for Microsoft Azure +| NO_DOCS | Flag to disable documentation generation +| NO_PROXY | List of addresses to bypass proxy +| OAUTH_TOKEN_INFO_ENDPOINT | Endpoint for OAuth token info retrieval +| OPENAI_API_BASE | Base URL for OpenAI API +| OPENAI_API_KEY | API key for OpenAI services +| OPENAI_ORGANIZATION | Organization identifier for OpenAI +| OPENID_BASE_URL | Base URL for OpenID Connect services +| OPENID_CLIENT_ID | Client ID for OpenID Connect authentication +| OPENID_CLIENT_SECRET | Client secret for OpenID Connect authentication +| OPENMETER_API_ENDPOINT | API endpoint for OpenMeter integration +| OPENMETER_API_KEY | API key for OpenMeter services +| OPENMETER_EVENT_TYPE | Type of events sent to OpenMeter +| OTEL_ENDPOINT | OpenTelemetry endpoint for traces +| OTEL_ENVIRONMENT_NAME | Environment name for OpenTelemetry +| OTEL_EXPORTER | Exporter type for OpenTelemetry +| OTEL_HEADERS | Headers for OpenTelemetry requests +| OTEL_SERVICE_NAME | Service name identifier for OpenTelemetry +| OTEL_TRACER_NAME | Tracer name for OpenTelemetry tracing +| PREDIBASE_API_BASE | Base URL for Predibase API +| PRESIDIO_ANALYZER_API_BASE | Base URL for Presidio Analyzer service +| PRESIDIO_ANONYMIZER_API_BASE | Base URL for Presidio Anonymizer service +| PROMETHEUS_URL | URL for Prometheus service +| PROMPTLAYER_API_KEY | API key for PromptLayer integration +| PROXY_ADMIN_ID | Admin identifier for proxy server +| PROXY_BASE_URL | Base URL for proxy service +| PROXY_LOGOUT_URL | URL for logging out of the proxy service +| PROXY_MASTER_KEY | Master key for proxy authentication +| QDRANT_API_BASE | Base URL for Qdrant API +| QDRANT_API_KEY | API key for Qdrant service +| QDRANT_URL | Connection URL for Qdrant database +| REDIS_HOST | Hostname for Redis server +| REDIS_PASSWORD | Password for Redis service +| REDIS_PORT | Port number for Redis server +| REDOC_URL | The path to the Redoc Fast API documentation. **By default this is "/redoc"** +| SERVER_ROOT_PATH | Root path for the server application +| SET_VERBOSE | Flag to enable verbose logging +| SLACK_DAILY_REPORT_FREQUENCY | Frequency of daily Slack reports (e.g., daily, weekly) +| SLACK_WEBHOOK_URL | Webhook URL for Slack integration +| SMTP_HOST | Hostname for the SMTP server +| SMTP_PASSWORD | Password for SMTP authentication +| SMTP_PORT | Port number for SMTP server +| SMTP_SENDER_EMAIL | Email address used as the sender in SMTP transactions +| SMTP_SENDER_LOGO | Logo used in emails sent via SMTP +| SMTP_TLS | Flag to enable or disable TLS for SMTP connections +| SMTP_USERNAME | Username for SMTP authentication +| SPEND_LOGS_URL | URL for retrieving spend logs +| SSL_CERTIFICATE | Path to the SSL certificate file +| SSL_VERIFY | Flag to enable or disable SSL certificate verification +| SUPABASE_KEY | API key for Supabase service +| SUPABASE_URL | Base URL for Supabase instance +| TEST_EMAIL_ADDRESS | Email address used for testing purposes +| UI_LOGO_PATH | Path to the logo image used in the UI +| UI_PASSWORD | Password for accessing the UI +| UI_USERNAME | Username for accessing the UI +| UPSTREAM_LANGFUSE_DEBUG | Flag to enable debugging for upstream Langfuse +| UPSTREAM_LANGFUSE_HOST | Host URL for upstream Langfuse service +| UPSTREAM_LANGFUSE_PUBLIC_KEY | Public key for upstream Langfuse authentication +| UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse +| UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication +| USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption +| WEBHOOK_URL | URL for receiving webhooks from external services + diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md index df22a29e3..7876c9dec 100644 --- a/docs/my-website/docs/proxy/configs.md +++ b/docs/my-website/docs/proxy/configs.md @@ -2,7 +2,7 @@ import Image from '@theme/IdealImage'; import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# Proxy Config.yaml +# Overview Set model list, `api_base`, `api_key`, `temperature` & proxy server settings (`master-key`) on the config.yaml. | Param Name | Description | @@ -357,77 +357,6 @@ curl --location 'http://0.0.0.0:4000/v1/model/info' \ --data '' ``` - -### Provider specific wildcard routing -**Proxy all models from a provider** - -Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml** - -**Step 1** - define provider specific routing on config.yaml -```yaml -model_list: - # provider specific wildcard routing - - model_name: "anthropic/*" - litellm_params: - model: "anthropic/*" - api_key: os.environ/ANTHROPIC_API_KEY - - model_name: "groq/*" - litellm_params: - model: "groq/*" - api_key: os.environ/GROQ_API_KEY - - model_name: "fo::*:static::*" # all requests matching this pattern will be routed to this deployment, example: model="fo::hi::static::hi" will be routed to deployment: "openai/fo::*:static::*" - litellm_params: - model: "openai/fo::*:static::*" - api_key: os.environ/OPENAI_API_KEY -``` - -Step 2 - Run litellm proxy - -```shell -$ litellm --config /path/to/config.yaml -``` - -Step 3 Test it - -Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*` -```shell -curl http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-1234" \ - -d '{ - "model": "anthropic/claude-3-sonnet-20240229", - "messages": [ - {"role": "user", "content": "Hello, Claude!"} - ] - }' -``` - -Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*` -```shell -curl http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-1234" \ - -d '{ - "model": "groq/llama3-8b-8192", - "messages": [ - {"role": "user", "content": "Hello, Claude!"} - ] - }' -``` - -Test with `fo::*::static::*` - all requests matching this pattern will be routed to `openai/fo::*:static::*` -```shell -curl http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-1234" \ - -d '{ - "model": "fo::hi::static::hi", - "messages": [ - {"role": "user", "content": "Hello, Claude!"} - ] - }' -``` - ### Load Balancing :::info @@ -597,481 +526,6 @@ general_settings: database_connection_timeout: 60 # sets a 60s timeout for any connection call to the db ``` -## **All settings** - - -```yaml -environment_variables: {} - -model_list: - - model_name: string - litellm_params: {} - model_info: - id: string - mode: embedding - input_cost_per_token: 0 - output_cost_per_token: 0 - max_tokens: 2048 - base_model: gpt-4-1106-preview - additionalProp1: {} - -litellm_settings: - # Logging/Callback settings - success_callback: ["langfuse"] # list of success callbacks - failure_callback: ["sentry"] # list of failure callbacks - callbacks: ["otel"] # list of callbacks - runs on success and failure - service_callbacks: ["datadog", "prometheus"] # logs redis, postgres failures on datadog, prometheus - turn_off_message_logging: boolean # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged. - redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging. - langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] # default tags for Langfuse Logging - - # Networking settings - request_timeout: 10 # (int) llm requesttimeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout - force_ipv4: boolean # If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API - - set_verbose: boolean # sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION - json_logs: boolean # if true, logs will be in json format - - # Fallbacks, reliability - default_fallbacks: ["claude-opus"] # set default_fallbacks, in case a specific model group is misconfigured / bad. - content_policy_fallbacks: [{"gpt-3.5-turbo-small": ["claude-opus"]}] # fallbacks for ContentPolicyErrors - context_window_fallbacks: [{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large", "claude-opus"]}] # fallbacks for ContextWindowExceededErrors - - - - # Caching settings - cache: true - cache_params: # set cache params for redis - type: redis # type of cache to initialize - - # Optional - Redis Settings - host: "localhost" # The host address for the Redis cache. Required if type is "redis". - port: 6379 # The port number for the Redis cache. Required if type is "redis". - password: "your_password" # The password for the Redis cache. Required if type is "redis". - namespace: "litellm.caching.caching" # namespace for redis cache - - # Optional - Redis Cluster Settings - redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}] - - # Optional - Redis Sentinel Settings - service_name: "mymaster" - sentinel_nodes: [["localhost", 26379]] - - # Optional - Qdrant Semantic Cache Settings - qdrant_semantic_cache_embedding_model: openai-embedding # the model should be defined on the model_list - qdrant_collection_name: test_collection - qdrant_quantization_config: binary - similarity_threshold: 0.8 # similarity threshold for semantic cache - - # Optional - S3 Cache Settings - s3_bucket_name: cache-bucket-litellm # AWS Bucket Name for S3 - s3_region_name: us-west-2 # AWS Region Name for S3 - s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/ to pass environment variables. This is AWS Access Key ID for S3 - s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3 - s3_endpoint_url: https://s3.amazonaws.com # [OPTIONAL] S3 endpoint URL, if you want to use Backblaze/cloudflare s3 bucket - - # Common Cache settings - # Optional - Supported call types for caching - supported_call_types: ["acompletion", "atext_completion", "aembedding", "atranscription"] - # /chat/completions, /completions, /embeddings, /audio/transcriptions - mode: default_off # if default_off, you need to opt in to caching on a per call basis - ttl: 600 # ttl for caching - - -callback_settings: - otel: - message_logging: boolean # OTEL logging callback specific settings - -general_settings: - completion_model: string - disable_spend_logs: boolean # turn off writing each transaction to the db - disable_master_key_return: boolean # turn off returning master key on UI (checked on '/user/info' endpoint) - disable_retry_on_max_parallel_request_limit_error: boolean # turn off retries when max parallel request limit is reached - disable_reset_budget: boolean # turn off reset budget scheduled task - disable_adding_master_key_hash_to_db: boolean # turn off storing master key hash in db, for spend tracking - enable_jwt_auth: boolean # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims - enforce_user_param: boolean # requires all openai endpoint requests to have a 'user' param - allowed_routes: ["route1", "route2"] # list of allowed proxy API routes - a user can access. (currently JWT-Auth only) - key_management_system: google_kms # either google_kms or azure_kms - master_key: string - - # Database Settings - database_url: string - database_connection_pool_limit: 0 # default 100 - database_connection_timeout: 0 # default 60s - allow_requests_on_db_unavailable: boolean # if true, will allow requests that can not connect to the DB to verify Virtual Key to still work - - custom_auth: string - max_parallel_requests: 0 # the max parallel requests allowed per deployment - global_max_parallel_requests: 0 # the max parallel requests allowed on the proxy all up - infer_model_from_keys: true - background_health_checks: true - health_check_interval: 300 - alerting: ["slack", "email"] - alerting_threshold: 0 - use_client_credentials_pass_through_routes: boolean # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints -``` - -### litellm_settings - Reference - -| Name | Type | Description | -|------|------|-------------| -| success_callback | array of strings | List of success callbacks. [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) | -| failure_callback | array of strings | List of failure callbacks [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) | -| callbacks | array of strings | List of callbacks - runs on success and failure [Doc Proxy logging callbacks](logging), [Doc Metrics](prometheus) | -| service_callbacks | array of strings | System health monitoring - Logs redis, postgres failures on specified services (e.g. datadog, prometheus) [Doc Metrics](prometheus) | -| turn_off_message_logging | boolean | If true, prevents messages and responses from being logged to callbacks, but request metadata will still be logged [Proxy Logging](logging) | -| modify_params | boolean | If true, allows modifying the parameters of the request before it is sent to the LLM provider | -| enable_preview_features | boolean | If true, enables preview features - e.g. Azure O1 Models with streaming support.| -| redact_user_api_key_info | boolean | If true, redacts information about the user api key from logs [Proxy Logging](logging#redacting-userapikeyinfo) | -| langfuse_default_tags | array of strings | Default tags for Langfuse Logging. Use this if you want to control which LiteLLM-specific fields are logged as tags by the LiteLLM proxy. By default LiteLLM Proxy logs no LiteLLM-specific fields as tags. [Further docs](./logging#litellm-specific-tags-on-langfuse---cache_hit-cache_key) | -| set_verbose | boolean | If true, sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION | -| json_logs | boolean | If true, logs will be in json format. If you need to store the logs as JSON, just set the `litellm.json_logs = True`. We currently just log the raw POST request from litellm as a JSON [Further docs](./debugging) | -| default_fallbacks | array of strings | List of fallback models to use if a specific model group is misconfigured / bad. [Further docs](./reliability#default-fallbacks) | -| request_timeout | integer | The timeout for requests in seconds. If not set, the default value is `6000 seconds`. [For reference OpenAI Python SDK defaults to `600 seconds`.](https://github.com/openai/openai-python/blob/main/src/openai/_constants.py) | -| force_ipv4 | boolean | If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API | -| content_policy_fallbacks | array of objects | Fallbacks to use when a ContentPolicyViolationError is encountered. [Further docs](./reliability#content-policy-fallbacks) | -| context_window_fallbacks | array of objects | Fallbacks to use when a ContextWindowExceededError is encountered. [Further docs](./reliability#context-window-fallbacks) | -| cache | boolean | If true, enables caching. [Further docs](./caching) | -| cache_params | object | Parameters for the cache. [Further docs](./caching) | -| cache_params.type | string | The type of cache to initialize. Can be one of ["local", "redis", "redis-semantic", "s3", "disk", "qdrant-semantic"]. Defaults to "redis". [Furher docs](./caching) | -| cache_params.host | string | The host address for the Redis cache. Required if type is "redis". | -| cache_params.port | integer | The port number for the Redis cache. Required if type is "redis". | -| cache_params.password | string | The password for the Redis cache. Required if type is "redis". | -| cache_params.namespace | string | The namespace for the Redis cache. | -| cache_params.redis_startup_nodes | array of objects | Redis Cluster Settings. [Further docs](./caching) | -| cache_params.service_name | string | Redis Sentinel Settings. [Further docs](./caching) | -| cache_params.sentinel_nodes | array of arrays | Redis Sentinel Settings. [Further docs](./caching) | -| cache_params.ttl | integer | The time (in seconds) to store entries in cache. | -| cache_params.qdrant_semantic_cache_embedding_model | string | The embedding model to use for qdrant semantic cache. | -| cache_params.qdrant_collection_name | string | The name of the collection to use for qdrant semantic cache. | -| cache_params.qdrant_quantization_config | string | The quantization configuration for the qdrant semantic cache. | -| cache_params.similarity_threshold | float | The similarity threshold for the semantic cache. | -| cache_params.s3_bucket_name | string | The name of the S3 bucket to use for the semantic cache. | -| cache_params.s3_region_name | string | The region name for the S3 bucket. | -| cache_params.s3_aws_access_key_id | string | The AWS access key ID for the S3 bucket. | -| cache_params.s3_aws_secret_access_key | string | The AWS secret access key for the S3 bucket. | -| cache_params.s3_endpoint_url | string | Optional - The endpoint URL for the S3 bucket. | -| cache_params.supported_call_types | array of strings | The types of calls to cache. [Further docs](./caching) | -| cache_params.mode | string | The mode of the cache. [Further docs](./caching) | -| disable_end_user_cost_tracking | boolean | If true, turns off end user cost tracking on prometheus metrics + litellm spend logs table on proxy. | -| key_generation_settings | object | Restricts who can generate keys. [Further docs](./virtual_keys.md#restricting-key-generation) | - -### general_settings - Reference - -| Name | Type | Description | -|------|------|-------------| -| completion_model | string | The default model to use for completions when `model` is not specified in the request | -| disable_spend_logs | boolean | If true, turns off writing each transaction to the database | -| disable_master_key_return | boolean | If true, turns off returning master key on UI. (checked on '/user/info' endpoint) | -| disable_retry_on_max_parallel_request_limit_error | boolean | If true, turns off retries when max parallel request limit is reached | -| disable_reset_budget | boolean | If true, turns off reset budget scheduled task | -| disable_adding_master_key_hash_to_db | boolean | If true, turns off storing master key hash in db | -| enable_jwt_auth | boolean | allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims. [Doc on JWT Tokens](token_auth) | -| enforce_user_param | boolean | If true, requires all OpenAI endpoint requests to have a 'user' param. [Doc on call hooks](call_hooks)| -| allowed_routes | array of strings | List of allowed proxy API routes a user can access [Doc on controlling allowed routes](enterprise#control-available-public-private-routes)| -| key_management_system | string | Specifies the key management system. [Doc Secret Managers](../secret) | -| master_key | string | The master key for the proxy [Set up Virtual Keys](virtual_keys) | -| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) | -| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) | -| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) | -| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key | -| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) | -| max_parallel_requests | integer | The max parallel requests allowed per deployment | -| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall | -| infer_model_from_keys | boolean | If true, infers the model from the provided keys | -| background_health_checks | boolean | If true, enables background health checks. [Doc on health checks](health) | -| health_check_interval | integer | The interval for health checks in seconds [Doc on health checks](health) | -| alerting | array of strings | List of alerting methods [Doc on Slack Alerting](alerting) | -| alerting_threshold | integer | The threshold for triggering alerts [Doc on Slack Alerting](alerting) | -| use_client_credentials_pass_through_routes | boolean | If true, uses client credentials for all pass-through routes. [Doc on pass through routes](pass_through) | -| health_check_details | boolean | If false, hides health check details (e.g. remaining rate limit). [Doc on health checks](health) | -| public_routes | List[str] | (Enterprise Feature) Control list of public routes | -| alert_types | List[str] | Control list of alert types to send to slack (Doc on alert types)[./alerting.md] | -| enforced_params | List[str] | (Enterprise Feature) List of params that must be included in all requests to the proxy | -| enable_oauth2_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication | -| use_x_forwarded_for | str | If true, uses the X-Forwarded-For header to get the client IP address | -| service_account_settings | List[Dict[str, Any]] | Set `service_account_settings` if you want to create settings that only apply to service account keys (Doc on service accounts)[./service_accounts.md] | -| image_generation_model | str | The default model to use for image generation - ignores model set in request | -| store_model_in_db | boolean | If true, allows `/model/new` endpoint to store model information in db. Endpoint disabled by default. [Doc on `/model/new` endpoint](./model_management.md#create-a-new-model) | -| max_request_size_mb | int | The maximum size for requests in MB. Requests above this size will be rejected. | -| max_response_size_mb | int | The maximum size for responses in MB. LLM Responses above this size will not be sent. | -| proxy_budget_rescheduler_min_time | int | The minimum time (in seconds) to wait before checking db for budget resets. **Default is 597 seconds** | -| proxy_budget_rescheduler_max_time | int | The maximum time (in seconds) to wait before checking db for budget resets. **Default is 605 seconds** | -| proxy_batch_write_at | int | Time (in seconds) to wait before batch writing spend logs to the db. **Default is 10 seconds** | -| alerting_args | dict | Args for Slack Alerting [Doc on Slack Alerting](./alerting.md) | -| custom_key_generate | str | Custom function for key generation [Doc on custom key generation](./virtual_keys.md#custom--key-generate) | -| allowed_ips | List[str] | List of IPs allowed to access the proxy. If not set, all IPs are allowed. | -| embedding_model | str | The default model to use for embeddings - ignores model set in request | -| default_team_disabled | boolean | If true, users cannot create 'personal' keys (keys with no team_id). | -| alert_to_webhook_url | Dict[str] | [Specify a webhook url for each alert type.](./alerting.md#set-specific-slack-channels-per-alert-type) | -| key_management_settings | List[Dict[str, Any]] | Settings for key management system (e.g. AWS KMS, Azure Key Vault) [Doc on key management](../secret.md) | -| allow_user_auth | boolean | (Deprecated) old approach for user authentication. | -| user_api_key_cache_ttl | int | The time (in seconds) to cache user api keys in memory. | -| disable_prisma_schema_update | boolean | If true, turns off automatic schema updates to DB | -| litellm_key_header_name | str | If set, allows passing LiteLLM keys as a custom header. [Doc on custom headers](./virtual_keys.md#custom-headers) | -| moderation_model | str | The default model to use for moderation. | -| custom_sso | str | Path to a python file that implements custom SSO logic. [Doc on custom SSO](./custom_sso.md) | -| allow_client_side_credentials | boolean | If true, allows passing client side credentials to the proxy. (Useful when testing finetuning models) [Doc on client side credentials](./virtual_keys.md#client-side-credentials) | -| admin_only_routes | List[str] | (Enterprise Feature) List of routes that are only accessible to admin users. [Doc on admin only routes](./enterprise#control-available-public-private-routes) | -| use_azure_key_vault | boolean | If true, load keys from azure key vault | -| use_google_kms | boolean | If true, load keys from google kms | -| spend_report_frequency | str | Specify how often you want a Spend Report to be sent (e.g. "1d", "2d", "30d") [More on this](./alerting.md#spend-report-frequency) | -| ui_access_mode | Literal["admin_only"] | If set, restricts access to the UI to admin users only. [Docs](./ui.md#restrict-ui-access) | -| litellm_jwtauth | Dict[str, Any] | Settings for JWT authentication. [Docs](./token_auth.md) | -| litellm_license | str | The license key for the proxy. [Docs](../enterprise.md#how-does-deployment-with-enterprise-license-work) | -| oauth2_config_mappings | Dict[str, str] | Define the OAuth2 config mappings | -| pass_through_endpoints | List[Dict[str, Any]] | Define the pass through endpoints. [Docs](./pass_through) | -| enable_oauth2_proxy_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication | -| forward_openai_org_id | boolean | If true, forwards the OpenAI Organization ID to the backend LLM call (if it's OpenAI). | -| forward_client_headers_to_llm_api | boolean | If true, forwards the client headers (any `x-` headers) to the backend LLM call | - -### router_settings - Reference - -```yaml -router_settings: - routing_strategy: usage-based-routing-v2 # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle" - redis_host: # string - redis_password: # string - redis_port: # string - enable_pre_call_check: true # bool - Before call is made check if a call is within model context window - allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. - cooldown_time: 30 # (in seconds) how long to cooldown model if fails/min > allowed_fails - disable_cooldowns: True # bool - Disable cooldowns for all models - enable_tag_filtering: True # bool - Use tag based routing for requests - retry_policy: { # Dict[str, int]: retry policy for different types of exceptions - "AuthenticationErrorRetries": 3, - "TimeoutErrorRetries": 3, - "RateLimitErrorRetries": 3, - "ContentPolicyViolationErrorRetries": 4, - "InternalServerErrorRetries": 4 - } - allowed_fails_policy: { - "BadRequestErrorAllowedFails": 1000, # Allow 1000 BadRequestErrors before cooling down a deployment - "AuthenticationErrorAllowedFails": 10, # int - "TimeoutErrorAllowedFails": 12, # int - "RateLimitErrorAllowedFails": 10000, # int - "ContentPolicyViolationErrorAllowedFails": 15, # int - "InternalServerErrorAllowedFails": 20, # int - } - content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for content policy violations - fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for all errors -``` - -| Name | Type | Description | -|------|------|-------------| -| routing_strategy | string | The strategy used for routing requests. Options: "simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing". Default is "simple-shuffle". [More information here](../routing) | -| redis_host | string | The host address for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them** | -| redis_password | string | The password for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them** | -| redis_port | string | The port number for the Redis server. **Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them**| -| enable_pre_call_check | boolean | If true, checks if a call is within the model's context window before making the call. [More information here](reliability) | -| content_policy_fallbacks | array of objects | Specifies fallback models for content policy violations. [More information here](reliability) | -| fallbacks | array of objects | Specifies fallback models for all types of errors. [More information here](reliability) | -| enable_tag_filtering | boolean | If true, uses tag based routing for requests [Tag Based Routing](tag_routing) | -| cooldown_time | integer | The duration (in seconds) to cooldown a model if it exceeds the allowed failures. | -| disable_cooldowns | boolean | If true, disables cooldowns for all models. [More information here](reliability) | -| retry_policy | object | Specifies the number of retries for different types of exceptions. [More information here](reliability) | -| allowed_fails | integer | The number of failures allowed before cooling down a model. [More information here](reliability) | -| allowed_fails_policy | object | Specifies the number of allowed failures for different error types before cooling down a deployment. [More information here](reliability) | - - -### environment variables - Reference - -| Name | Description | -|------|-------------| -| ACTIONS_ID_TOKEN_REQUEST_TOKEN | Token for requesting ID in GitHub Actions -| ACTIONS_ID_TOKEN_REQUEST_URL | URL for requesting ID token in GitHub Actions -| AISPEND_ACCOUNT_ID | Account ID for AI Spend -| AISPEND_API_KEY | API Key for AI Spend -| ALLOWED_EMAIL_DOMAINS | List of email domains allowed for access -| ARIZE_API_KEY | API key for Arize platform integration -| ARIZE_SPACE_KEY | Space key for Arize platform -| ARGILLA_BATCH_SIZE | Batch size for Argilla logging -| ARGILLA_API_KEY | API key for Argilla platform -| ARGILLA_SAMPLING_RATE | Sampling rate for Argilla logging -| ARGILLA_DATASET_NAME | Dataset name for Argilla logging -| ARGILLA_BASE_URL | Base URL for Argilla service -| ATHINA_API_KEY | API key for Athina service -| AUTH_STRATEGY | Strategy used for authentication (e.g., OAuth, API key) -| AWS_ACCESS_KEY_ID | Access Key ID for AWS services -| AWS_PROFILE_NAME | AWS CLI profile name to be used -| AWS_REGION_NAME | Default AWS region for service interactions -| AWS_ROLE_NAME | Role name for AWS IAM usage -| AWS_SECRET_ACCESS_KEY | Secret Access Key for AWS services -| AWS_SESSION_NAME | Name for AWS session -| AWS_WEB_IDENTITY_TOKEN | Web identity token for AWS -| AZURE_API_VERSION | Version of the Azure API being used -| AZURE_AUTHORITY_HOST | Azure authority host URL -| AZURE_CLIENT_ID | Client ID for Azure services -| AZURE_CLIENT_SECRET | Client secret for Azure services -| AZURE_FEDERATED_TOKEN_FILE | File path to Azure federated token -| AZURE_KEY_VAULT_URI | URI for Azure Key Vault -| AZURE_TENANT_ID | Tenant ID for Azure Active Directory -| BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service -| BRAINTRUST_API_KEY | API key for Braintrust integration -| CIRCLE_OIDC_TOKEN | OpenID Connect token for CircleCI -| CIRCLE_OIDC_TOKEN_V2 | Version 2 of the OpenID Connect token for CircleCI -| CONFIG_FILE_PATH | File path for configuration file -| CUSTOM_TIKTOKEN_CACHE_DIR | Custom directory for Tiktoken cache -| DATABASE_HOST | Hostname for the database server -| DATABASE_NAME | Name of the database -| DATABASE_PASSWORD | Password for the database user -| DATABASE_PORT | Port number for database connection -| DATABASE_SCHEMA | Schema name used in the database -| DATABASE_URL | Connection URL for the database -| DATABASE_USER | Username for database connection -| DATABASE_USERNAME | Alias for database user -| DATABRICKS_API_BASE | Base URL for Databricks API -| DD_BASE_URL | Base URL for Datadog integration -| DATADOG_BASE_URL | (Alternative to DD_BASE_URL) Base URL for Datadog integration -| _DATADOG_BASE_URL | (Alternative to DD_BASE_URL) Base URL for Datadog integration -| DD_API_KEY | API key for Datadog integration -| DD_SITE | Site URL for Datadog (e.g., datadoghq.com) -| DD_SOURCE | Source identifier for Datadog logs -| DD_ENV | Environment identifier for Datadog logs. Only supported for `datadog_llm_observability` callback -| DEBUG_OTEL | Enable debug mode for OpenTelemetry -| DIRECT_URL | Direct URL for service endpoint -| DISABLE_ADMIN_UI | Toggle to disable the admin UI -| DISABLE_SCHEMA_UPDATE | Toggle to disable schema updates -| DOCS_DESCRIPTION | Description text for documentation pages -| DOCS_FILTERED | Flag indicating filtered documentation -| DOCS_TITLE | Title of the documentation pages -| DOCS_URL | The path to the Swagger API documentation. **By default this is "/"** -| EMAIL_SUPPORT_CONTACT | Support contact email address -| GCS_BUCKET_NAME | Name of the Google Cloud Storage bucket -| GCS_PATH_SERVICE_ACCOUNT | Path to the Google Cloud service account JSON file -| GCS_FLUSH_INTERVAL | Flush interval for GCS logging (in seconds). Specify how often you want a log to be sent to GCS. **Default is 20 seconds** -| GCS_BATCH_SIZE | Batch size for GCS logging. Specify after how many logs you want to flush to GCS. If `BATCH_SIZE` is set to 10, logs are flushed every 10 logs. **Default is 2048** -| GENERIC_AUTHORIZATION_ENDPOINT | Authorization endpoint for generic OAuth providers -| GENERIC_CLIENT_ID | Client ID for generic OAuth providers -| GENERIC_CLIENT_SECRET | Client secret for generic OAuth providers -| GENERIC_CLIENT_STATE | State parameter for generic client authentication -| GENERIC_INCLUDE_CLIENT_ID | Include client ID in requests for OAuth -| GENERIC_SCOPE | Scope settings for generic OAuth providers -| GENERIC_TOKEN_ENDPOINT | Token endpoint for generic OAuth providers -| GENERIC_USER_DISPLAY_NAME_ATTRIBUTE | Attribute for user's display name in generic auth -| GENERIC_USER_EMAIL_ATTRIBUTE | Attribute for user's email in generic auth -| GENERIC_USER_FIRST_NAME_ATTRIBUTE | Attribute for user's first name in generic auth -| GENERIC_USER_ID_ATTRIBUTE | Attribute for user ID in generic auth -| GENERIC_USER_LAST_NAME_ATTRIBUTE | Attribute for user's last name in generic auth -| GENERIC_USER_PROVIDER_ATTRIBUTE | Attribute specifying the user's provider -| GENERIC_USER_ROLE_ATTRIBUTE | Attribute specifying the user's role -| GENERIC_USERINFO_ENDPOINT | Endpoint to fetch user information in generic OAuth -| GALILEO_BASE_URL | Base URL for Galileo platform -| GALILEO_PASSWORD | Password for Galileo authentication -| GALILEO_PROJECT_ID | Project ID for Galileo usage -| GALILEO_USERNAME | Username for Galileo authentication -| GREENSCALE_API_KEY | API key for Greenscale service -| GREENSCALE_ENDPOINT | Endpoint URL for Greenscale service -| GOOGLE_APPLICATION_CREDENTIALS | Path to Google Cloud credentials JSON file -| GOOGLE_CLIENT_ID | Client ID for Google OAuth -| GOOGLE_CLIENT_SECRET | Client secret for Google OAuth -| GOOGLE_KMS_RESOURCE_NAME | Name of the resource in Google KMS -| HF_API_BASE | Base URL for Hugging Face API -| HELICONE_API_KEY | API key for Helicone service -| HUGGINGFACE_API_BASE | Base URL for Hugging Face API -| IAM_TOKEN_DB_AUTH | IAM token for database authentication -| JSON_LOGS | Enable JSON formatted logging -| JWT_AUDIENCE | Expected audience for JWT tokens -| JWT_PUBLIC_KEY_URL | URL to fetch public key for JWT verification -| LAGO_API_BASE | Base URL for Lago API -| LAGO_API_CHARGE_BY | Parameter to determine charge basis in Lago -| LAGO_API_EVENT_CODE | Event code for Lago API events -| LAGO_API_KEY | API key for accessing Lago services -| LANGFUSE_DEBUG | Toggle debug mode for Langfuse -| LANGFUSE_FLUSH_INTERVAL | Interval for flushing Langfuse logs -| LANGFUSE_HOST | Host URL for Langfuse service -| LANGFUSE_PUBLIC_KEY | Public key for Langfuse authentication -| LANGFUSE_RELEASE | Release version of Langfuse integration -| LANGFUSE_SECRET_KEY | Secret key for Langfuse authentication -| LANGSMITH_API_KEY | API key for Langsmith platform -| LANGSMITH_BASE_URL | Base URL for Langsmith service -| LANGSMITH_BATCH_SIZE | Batch size for operations in Langsmith -| LANGSMITH_DEFAULT_RUN_NAME | Default name for Langsmith run -| LANGSMITH_PROJECT | Project name for Langsmith integration -| LANGSMITH_SAMPLING_RATE | Sampling rate for Langsmith logging -| LANGTRACE_API_KEY | API key for Langtrace service -| LITERAL_API_KEY | API key for Literal integration -| LITERAL_API_URL | API URL for Literal service -| LITERAL_BATCH_SIZE | Batch size for Literal operations -| LITELLM_DONT_SHOW_FEEDBACK_BOX | Flag to hide feedback box in LiteLLM UI -| LITELLM_DROP_PARAMS | Parameters to drop in LiteLLM requests -| LITELLM_EMAIL | Email associated with LiteLLM account -| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRIES | Maximum retries for parallel requests in LiteLLM -| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRY_TIMEOUT | Timeout for retries of parallel requests in LiteLLM -| LITELLM_HOSTED_UI | URL of the hosted UI for LiteLLM -| LITELLM_LICENSE | License key for LiteLLM usage -| LITELLM_LOCAL_MODEL_COST_MAP | Local configuration for model cost mapping in LiteLLM -| LITELLM_LOG | Enable detailed logging for LiteLLM -| LITELLM_MODE | Operating mode for LiteLLM (e.g., production, development) -| LITELLM_SALT_KEY | Salt key for encryption in LiteLLM -| LITELLM_SECRET_AWS_KMS_LITELLM_LICENSE | AWS KMS encrypted license for LiteLLM -| LITELLM_TOKEN | Access token for LiteLLM integration -| LOGFIRE_TOKEN | Token for Logfire logging service -| MICROSOFT_CLIENT_ID | Client ID for Microsoft services -| MICROSOFT_CLIENT_SECRET | Client secret for Microsoft services -| MICROSOFT_TENANT | Tenant ID for Microsoft Azure -| NO_DOCS | Flag to disable documentation generation -| NO_PROXY | List of addresses to bypass proxy -| OAUTH_TOKEN_INFO_ENDPOINT | Endpoint for OAuth token info retrieval -| OPENAI_API_BASE | Base URL for OpenAI API -| OPENAI_API_KEY | API key for OpenAI services -| OPENAI_ORGANIZATION | Organization identifier for OpenAI -| OPENID_BASE_URL | Base URL for OpenID Connect services -| OPENID_CLIENT_ID | Client ID for OpenID Connect authentication -| OPENID_CLIENT_SECRET | Client secret for OpenID Connect authentication -| OPENMETER_API_ENDPOINT | API endpoint for OpenMeter integration -| OPENMETER_API_KEY | API key for OpenMeter services -| OPENMETER_EVENT_TYPE | Type of events sent to OpenMeter -| OTEL_ENDPOINT | OpenTelemetry endpoint for traces -| OTEL_ENVIRONMENT_NAME | Environment name for OpenTelemetry -| OTEL_EXPORTER | Exporter type for OpenTelemetry -| OTEL_HEADERS | Headers for OpenTelemetry requests -| OTEL_SERVICE_NAME | Service name identifier for OpenTelemetry -| OTEL_TRACER_NAME | Tracer name for OpenTelemetry tracing -| PREDIBASE_API_BASE | Base URL for Predibase API -| PRESIDIO_ANALYZER_API_BASE | Base URL for Presidio Analyzer service -| PRESIDIO_ANONYMIZER_API_BASE | Base URL for Presidio Anonymizer service -| PROMETHEUS_URL | URL for Prometheus service -| PROMPTLAYER_API_KEY | API key for PromptLayer integration -| PROXY_ADMIN_ID | Admin identifier for proxy server -| PROXY_BASE_URL | Base URL for proxy service -| PROXY_LOGOUT_URL | URL for logging out of the proxy service -| PROXY_MASTER_KEY | Master key for proxy authentication -| QDRANT_API_BASE | Base URL for Qdrant API -| QDRANT_API_KEY | API key for Qdrant service -| QDRANT_URL | Connection URL for Qdrant database -| REDIS_HOST | Hostname for Redis server -| REDIS_PASSWORD | Password for Redis service -| REDIS_PORT | Port number for Redis server -| REDOC_URL | The path to the Redoc Fast API documentation. **By default this is "/redoc"** -| SERVER_ROOT_PATH | Root path for the server application -| SET_VERBOSE | Flag to enable verbose logging -| SLACK_DAILY_REPORT_FREQUENCY | Frequency of daily Slack reports (e.g., daily, weekly) -| SLACK_WEBHOOK_URL | Webhook URL for Slack integration -| SMTP_HOST | Hostname for the SMTP server -| SMTP_PASSWORD | Password for SMTP authentication -| SMTP_PORT | Port number for SMTP server -| SMTP_SENDER_EMAIL | Email address used as the sender in SMTP transactions -| SMTP_SENDER_LOGO | Logo used in emails sent via SMTP -| SMTP_TLS | Flag to enable or disable TLS for SMTP connections -| SMTP_USERNAME | Username for SMTP authentication -| SPEND_LOGS_URL | URL for retrieving spend logs -| SSL_CERTIFICATE | Path to the SSL certificate file -| SSL_VERIFY | Flag to enable or disable SSL certificate verification -| SUPABASE_KEY | API key for Supabase service -| SUPABASE_URL | Base URL for Supabase instance -| TEST_EMAIL_ADDRESS | Email address used for testing purposes -| UI_LOGO_PATH | Path to the logo image used in the UI -| UI_PASSWORD | Password for accessing the UI -| UI_USERNAME | Username for accessing the UI -| UPSTREAM_LANGFUSE_DEBUG | Flag to enable debugging for upstream Langfuse -| UPSTREAM_LANGFUSE_HOST | Host URL for upstream Langfuse service -| UPSTREAM_LANGFUSE_PUBLIC_KEY | Public key for upstream Langfuse authentication -| UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse -| UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication -| USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption -| WEBHOOK_URL | URL for receiving webhooks from external services ## Extras diff --git a/docs/my-website/docs/proxy/db_info.md b/docs/my-website/docs/proxy/db_info.md index 6e6a48bd1..8429f6360 100644 --- a/docs/my-website/docs/proxy/db_info.md +++ b/docs/my-website/docs/proxy/db_info.md @@ -50,18 +50,22 @@ You can see the full DB Schema [here](https://github.com/BerriAI/litellm/blob/ma | LiteLLM_ErrorLogs | Captures failed requests and errors. Stores exception details and request information. Helps with debugging and monitoring. | **Medium - on errors only** | | LiteLLM_AuditLog | Tracks changes to system configuration. Records who made changes and what was modified. Maintains history of updates to teams, users, and models. | **Off by default**, **High - when enabled** | -## How to Disable `LiteLLM_SpendLogs` +## Disable `LiteLLM_SpendLogs` & `LiteLLM_ErrorLogs` -You can disable spend_logs by setting `disable_spend_logs` to `True` on the `general_settings` section of your proxy_config.yaml file. +You can disable spend_logs and error_logs by setting `disable_spend_logs` and `disable_error_logs` to `True` on the `general_settings` section of your proxy_config.yaml file. ```yaml general_settings: - disable_spend_logs: True + disable_spend_logs: True # Disable writing spend logs to DB + disable_error_logs: True # Disable writing error logs to DB ``` +### What is the impact of disabling these logs? -### What is the impact of disabling `LiteLLM_SpendLogs`? - +When disabling spend logs (`disable_spend_logs: True`): - You **will not** be able to view Usage on the LiteLLM UI - You **will** continue seeing cost metrics on s3, Prometheus, Langfuse (any other Logging integration you are using) +When disabling error logs (`disable_error_logs: True`): +- You **will not** be able to view Errors on the LiteLLM UI +- You **will** continue seeing error logs in your application logs and any other logging integrations you are using diff --git a/docs/my-website/docs/proxy/prod.md b/docs/my-website/docs/proxy/prod.md index 32a6fceee..9dacedaab 100644 --- a/docs/my-website/docs/proxy/prod.md +++ b/docs/my-website/docs/proxy/prod.md @@ -23,6 +23,7 @@ general_settings: # OPTIONAL Best Practices disable_spend_logs: True # turn off writing each transaction to the db. We recommend doing this is you don't need to see Usage on the LiteLLM UI and are tracking metrics via Prometheus + disable_error_logs: True # turn off writing LLM Exceptions to DB allow_requests_on_db_unavailable: True # Only USE when running LiteLLM on your VPC. Allow requests to still be processed even if the DB is unavailable. We recommend doing this if you're running LiteLLM on VPC that cannot be accessed from the public internet. litellm_settings: @@ -102,17 +103,22 @@ general_settings: allow_requests_on_db_unavailable: True ``` -## 6. Disable spend_logs if you're not using the LiteLLM UI +## 6. Disable spend_logs & error_logs if not using the LiteLLM UI -By default LiteLLM will write every request to the `LiteLLM_SpendLogs` table. This is used for viewing Usage on the LiteLLM UI. +By default, LiteLLM writes several types of logs to the database: +- Every LLM API request to the `LiteLLM_SpendLogs` table +- LLM Exceptions to the `LiteLLM_LogsErrors` table -If you're not viewing Usage on the LiteLLM UI (most users use Prometheus when this is disabled), you can disable spend_logs by setting `disable_spend_logs` to `True`. +If you're not viewing these logs on the LiteLLM UI (most users use Prometheus for monitoring), you can disable them by setting the following flags to `True`: ```yaml general_settings: - disable_spend_logs: True + disable_spend_logs: True # Disable writing spend logs to DB + disable_error_logs: True # Disable writing error logs to DB ``` +[More information about what the Database is used for here](db_info) + ## 7. Use Helm PreSync Hook for Database Migrations [BETA] To ensure only one service manages database migrations, use our [Helm PreSync hook for Database Migrations](https://github.com/BerriAI/litellm/blob/main/deploy/charts/litellm-helm/templates/migrations-job.yaml). This ensures migrations are handled during `helm upgrade` or `helm install`, while LiteLLM pods explicitly disable migrations. diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index 58dc3dae3..f19101b36 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -192,3 +192,13 @@ Here is a screenshot of the metrics you can monitor with the LiteLLM Grafana Das |----------------------|--------------------------------------| | `litellm_llm_api_failed_requests_metric` | **deprecated** use `litellm_proxy_failed_requests_metric` | | `litellm_requests_metric` | **deprecated** use `litellm_proxy_total_requests_metric` | + + +## FAQ + +### What are `_created` vs. `_total` metrics? + +- `_created` metrics are metrics that are created when the proxy starts +- `_total` metrics are metrics that are incremented for each request + +You should consume the `_total` metrics for your counting purposes \ No newline at end of file diff --git a/docs/my-website/docs/router_architecture.md b/docs/my-website/docs/router_architecture.md new file mode 100644 index 000000000..13e9e411c --- /dev/null +++ b/docs/my-website/docs/router_architecture.md @@ -0,0 +1,24 @@ +import Image from '@theme/IdealImage'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Router Architecture (Fallbacks / Retries) + +## High Level architecture + + + +### Request Flow + +1. **User Sends Request**: The process begins when a user sends a request to the LiteLLM Router endpoint. All unified endpoints (`.completion`, `.embeddings`, etc) are supported by LiteLLM Router. + +2. **function_with_fallbacks**: The initial request is sent to the `function_with_fallbacks` function. This function wraps the initial request in a try-except block, to handle any exceptions - doing fallbacks if needed. This request is then sent to the `function_with_retries` function. + + +3. **function_with_retries**: The `function_with_retries` function wraps the request in a try-except block and passes the initial request to a base litellm unified function (`litellm.completion`, `litellm.embeddings`, etc) to handle LLM API calling. `function_with_retries` handles any exceptions - doing retries on the model group if needed (i.e. if the request fails, it will retry on an available model within the model group). + +4. **litellm.completion**: The `litellm.completion` function is a base function that handles the LLM API calling. It is used by `function_with_retries` to make the actual request to the LLM API. + +## Legend + +**model_group**: A group of LLM API deployments that share the same `model_name`, are part of the same `model_group`, and can be load balanced across. \ No newline at end of file diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index 702cafa7f..87fad7437 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -1891,3 +1891,22 @@ router = Router( debug_level="DEBUG" # defaults to INFO ) ``` + +## Router General Settings + +### Usage + +```python +router = Router(model_list=..., router_general_settings=RouterGeneralSettings(async_only_mode=True)) +``` + +### Spec +```python +class RouterGeneralSettings(BaseModel): + async_only_mode: bool = Field( + default=False + ) # this will only initialize async clients. Good for memory utils + pass_through_all_models: bool = Field( + default=False + ) # if passed a model not llm_router model list, pass through the request to litellm.acompletion/embedding +``` \ No newline at end of file diff --git a/docs/my-website/docs/text_completion.md b/docs/my-website/docs/text_completion.md new file mode 100644 index 000000000..8be40dfdc --- /dev/null +++ b/docs/my-website/docs/text_completion.md @@ -0,0 +1,174 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Text Completion + +### Usage + + + +```python +from litellm import text_completion + +response = text_completion( + model="gpt-3.5-turbo-instruct", + prompt="Say this is a test", + max_tokens=7 +) +``` + + + + +1. Define models on config.yaml + +```yaml +model_list: + - model_name: gpt-3.5-turbo-instruct + litellm_params: + model: text-completion-openai/gpt-3.5-turbo-instruct # The `text-completion-openai/` prefix will call openai.completions.create + api_key: os.environ/OPENAI_API_KEY + - model_name: text-davinci-003 + litellm_params: + model: text-completion-openai/text-davinci-003 + api_key: os.environ/OPENAI_API_KEY +``` + +2. Start litellm proxy server + +``` +litellm --config config.yaml +``` + + + + +```python +from openai import OpenAI + +# set base_url to your proxy server +# set api_key to send to proxy server +client = OpenAI(api_key="", base_url="http://0.0.0.0:4000") + +response = client.completions.create( + model="gpt-3.5-turbo-instruct", + prompt="Say this is a test", + max_tokens=7 +) + +print(response) +``` + + + + +```shell +curl --location 'http://0.0.0.0:4000/completions' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer sk-1234' \ + --data '{ + "model": "gpt-3.5-turbo-instruct", + "prompt": "Say this is a test", + "max_tokens": 7 + }' +``` + + + + + + +## Input Params + +LiteLLM accepts and translates the [OpenAI Text Completion params](https://platform.openai.com/docs/api-reference/completions) across all supported providers. + +### Required Fields + +- `model`: *string* - ID of the model to use +- `prompt`: *string or array* - The prompt(s) to generate completions for + +### Optional Fields + +- `best_of`: *integer* - Generates best_of completions server-side and returns the "best" one +- `echo`: *boolean* - Echo back the prompt in addition to the completion. +- `frequency_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency. +- `logit_bias`: *map* - Modify the likelihood of specified tokens appearing in the completion +- `logprobs`: *integer* - Include the log probabilities on the logprobs most likely tokens. Max value of 5 +- `max_tokens`: *integer* - The maximum number of tokens to generate. +- `n`: *integer* - How many completions to generate for each prompt. +- `presence_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far. +- `seed`: *integer* - If specified, system will attempt to make deterministic samples +- `stop`: *string or array* - Up to 4 sequences where the API will stop generating tokens +- `stream`: *boolean* - Whether to stream back partial progress. Defaults to false +- `suffix`: *string* - The suffix that comes after a completion of inserted text +- `temperature`: *number* - What sampling temperature to use, between 0 and 2. +- `top_p`: *number* - An alternative to sampling with temperature, called nucleus sampling. +- `user`: *string* - A unique identifier representing your end-user + +## Output Format +Here's the exact JSON output format you can expect from completion calls: + + +[**Follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/completions/object) + + + + + +```python +{ + "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", + "object": "text_completion", + "created": 1589478378, + "model": "gpt-3.5-turbo-instruct", + "system_fingerprint": "fp_44709d6fcb", + "choices": [ + { + "text": "\n\nThis is indeed a test", + "index": 0, + "logprobs": null, + "finish_reason": "length" + } + ], + "usage": { + "prompt_tokens": 5, + "completion_tokens": 7, + "total_tokens": 12 + } +} + +``` + + + +```python +{ + "id": "cmpl-7iA7iJjj8V2zOkCGvWF2hAkDWBQZe", + "object": "text_completion", + "created": 1690759702, + "choices": [ + { + "text": "This", + "index": 0, + "logprobs": null, + "finish_reason": null + } + ], + "model": "gpt-3.5-turbo-instruct" + "system_fingerprint": "fp_44709d6fcb", +} + +``` + + + + + +## **Supported Providers** + +| Provider | Link to Usage | +|-------------|--------------------| +| OpenAI | [Usage](../docs/providers/text_completion_openai) | +| Azure OpenAI| [Usage](../docs/providers/azure) | + + diff --git a/docs/my-website/docs/wildcard_routing.md b/docs/my-website/docs/wildcard_routing.md new file mode 100644 index 000000000..80926d73e --- /dev/null +++ b/docs/my-website/docs/wildcard_routing.md @@ -0,0 +1,140 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Provider specific Wildcard routing + +**Proxy all models from a provider** + +Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml** + +## Step 1. Define provider specific routing + + + + +```python +from litellm import Router + +router = Router( + model_list=[ + { + "model_name": "anthropic/*", + "litellm_params": { + "model": "anthropic/*", + "api_key": os.environ["ANTHROPIC_API_KEY"] + } + }, + { + "model_name": "groq/*", + "litellm_params": { + "model": "groq/*", + "api_key": os.environ["GROQ_API_KEY"] + } + }, + { + "model_name": "fo::*:static::*", # all requests matching this pattern will be routed to this deployment, example: model="fo::hi::static::hi" will be routed to deployment: "openai/fo::*:static::*" + "litellm_params": { + "model": "openai/fo::*:static::*", + "api_key": os.environ["OPENAI_API_KEY"] + } + } + ] +) +``` + + + + +**Step 1** - define provider specific routing on config.yaml +```yaml +model_list: + # provider specific wildcard routing + - model_name: "anthropic/*" + litellm_params: + model: "anthropic/*" + api_key: os.environ/ANTHROPIC_API_KEY + - model_name: "groq/*" + litellm_params: + model: "groq/*" + api_key: os.environ/GROQ_API_KEY + - model_name: "fo::*:static::*" # all requests matching this pattern will be routed to this deployment, example: model="fo::hi::static::hi" will be routed to deployment: "openai/fo::*:static::*" + litellm_params: + model: "openai/fo::*:static::*" + api_key: os.environ/OPENAI_API_KEY +``` + + + +## [PROXY-Only] Step 2 - Run litellm proxy + +```shell +$ litellm --config /path/to/config.yaml +``` + +## Step 3 - Test it + + + + +```python +from litellm import Router + +router = Router(model_list=...) + +# Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*` +resp = completion(model="anthropic/claude-3-sonnet-20240229", messages=[{"role": "user", "content": "Hello, Claude!"}]) +print(resp) + +# Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*` +resp = completion(model="groq/llama3-8b-8192", messages=[{"role": "user", "content": "Hello, Groq!"}]) +print(resp) + +# Test with `fo::*::static::*` - all requests matching this pattern will be routed to `openai/fo::*:static::*` +resp = completion(model="fo::hi::static::hi", messages=[{"role": "user", "content": "Hello, Claude!"}]) +print(resp) +``` + + + + +Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*` +```bash +curl http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "anthropic/claude-3-sonnet-20240229", + "messages": [ + {"role": "user", "content": "Hello, Claude!"} + ] + }' +``` + +Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*` +```shell +curl http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "groq/llama3-8b-8192", + "messages": [ + {"role": "user", "content": "Hello, Claude!"} + ] + }' +``` + +Test with `fo::*::static::*` - all requests matching this pattern will be routed to `openai/fo::*:static::*` +```shell +curl http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "fo::hi::static::hi", + "messages": [ + {"role": "user", "content": "Hello, Claude!"} + ] + }' +``` + + + diff --git a/docs/my-website/img/router_architecture.png b/docs/my-website/img/router_architecture.png new file mode 100644 index 000000000..195834185 Binary files /dev/null and b/docs/my-website/img/router_architecture.png differ diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index cdcf3ba1b..e6a028d83 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -29,13 +29,17 @@ const sidebars = { }, items: [ "proxy/docker_quick_start", + { + "type": "category", + "label": "Config.yaml", + "items": ["proxy/configs", "proxy/config_management", "proxy/config_settings"] + }, { type: "category", label: "Setup & Deployment", items: [ "proxy/deploy", "proxy/prod", - "proxy/configs", "proxy/cli", "proxy/model_management", "proxy/health", @@ -47,7 +51,7 @@ const sidebars = { { type: "category", label: "Architecture", - items: ["proxy/architecture", "proxy/db_info"], + items: ["proxy/architecture", "proxy/db_info", "router_architecture"], }, { type: "link", @@ -242,6 +246,7 @@ const sidebars = { "completion/usage", ], }, + "text_completion", "embedding/supported_embedding", "image_generation", { @@ -257,6 +262,7 @@ const sidebars = { "batches", "realtime", "fine_tuning", + "moderation", { type: "link", label: "Use LiteLLM Proxy with Vertex, Bedrock SDK", @@ -273,7 +279,7 @@ const sidebars = { description: "Learn how to load balance, route, and set fallbacks for your LLM requests", slug: "/routing-load-balancing", }, - items: ["routing", "scheduler", "proxy/load_balancing", "proxy/reliability", "proxy/tag_routing", "proxy/provider_budget_routing", "proxy/team_based_routing", "proxy/customer_routing"], + items: ["routing", "scheduler", "proxy/load_balancing", "proxy/reliability", "proxy/tag_routing", "proxy/provider_budget_routing", "proxy/team_based_routing", "proxy/customer_routing", "wildcard_routing"], }, { type: "category", diff --git a/enterprise/utils.py b/enterprise/utils.py index f0af1d676..cc97661d7 100644 --- a/enterprise/utils.py +++ b/enterprise/utils.py @@ -2,7 +2,9 @@ from typing import Optional, List from litellm._logging import verbose_logger from litellm.proxy.proxy_server import PrismaClient, HTTPException +from litellm.llms.custom_httpx.http_handler import HTTPHandler import collections +import httpx from datetime import datetime @@ -114,7 +116,6 @@ async def ui_get_spend_by_tags( def _forecast_daily_cost(data: list): - import requests # type: ignore from datetime import datetime, timedelta if len(data) == 0: @@ -136,17 +137,17 @@ def _forecast_daily_cost(data: list): print("last entry date", last_entry_date) - # Assuming today_date is a datetime object - today_date = datetime.now() - # Calculate the last day of the month last_day_of_todays_month = datetime( today_date.year, today_date.month % 12 + 1, 1 ) - timedelta(days=1) + print("last day of todays month", last_day_of_todays_month) # Calculate the remaining days in the month remaining_days = (last_day_of_todays_month - last_entry_date).days + print("remaining days", remaining_days) + current_spend_this_month = 0 series = {} for entry in data: @@ -176,13 +177,19 @@ def _forecast_daily_cost(data: list): "Content-Type": "application/json", } - response = requests.post( - url="https://trend-api-production.up.railway.app/forecast", - json=payload, - headers=headers, - ) - # check the status code - response.raise_for_status() + client = HTTPHandler() + + try: + response = client.post( + url="https://trend-api-production.up.railway.app/forecast", + json=payload, + headers=headers, + ) + except httpx.HTTPStatusError as e: + raise HTTPException( + status_code=500, + detail={"error": f"Error getting forecast: {e.response.text}"}, + ) json_response = response.json() forecast_data = json_response["forecast"] @@ -206,13 +213,3 @@ def _forecast_daily_cost(data: list): f"Predicted Spend for { today_month } 2024, ${total_predicted_spend}" ) return {"response": response_data, "predicted_spend": predicted_spend} - - # print(f"Date: {entry['date']}, Spend: {entry['spend']}, Response: {response.text}") - - -# _forecast_daily_cost( -# [ -# {"date": "2022-01-01", "spend": 100}, - -# ] -# ) diff --git a/litellm/__init__.py b/litellm/__init__.py index 65b1b3465..43f91fe58 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -68,6 +68,7 @@ callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = langfuse_default_tags: Optional[List[str]] = None langsmith_batch_size: Optional[int] = None argilla_batch_size: Optional[int] = None +datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload argilla_transformation_object: Optional[Dict[str, Any]] = None _async_input_callback: List[Callable] = ( [] diff --git a/litellm/_redis.py b/litellm/_redis.py index 2fba9d146..d905f1c9d 100644 --- a/litellm/_redis.py +++ b/litellm/_redis.py @@ -313,12 +313,13 @@ def get_redis_async_client(**env_overrides) -> async_redis.Redis: def get_redis_connection_pool(**env_overrides): redis_kwargs = _get_redis_client_logic(**env_overrides) + verbose_logger.debug("get_redis_connection_pool: redis_kwargs", redis_kwargs) if "url" in redis_kwargs and redis_kwargs["url"] is not None: return async_redis.BlockingConnectionPool.from_url( timeout=5, url=redis_kwargs["url"] ) connection_class = async_redis.Connection - if "ssl" in redis_kwargs and redis_kwargs["ssl"] is not None: + if "ssl" in redis_kwargs: connection_class = async_redis.SSLConnection redis_kwargs.pop("ssl", None) redis_kwargs["connection_class"] = connection_class diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index 40044ce9f..482c2bc10 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -32,9 +32,11 @@ from litellm.llms.custom_httpx.http_handler import ( get_async_httpx_client, httpxSpecialProvider, ) +from litellm.proxy._types import UserAPIKeyAuth +from litellm.types.integrations.datadog import * from litellm.types.services import ServiceLoggerPayload +from litellm.types.utils import StandardLoggingPayload -from .types import DD_ERRORS, DatadogPayload, DataDogStatus from .utils import make_json_serializable DD_MAX_BATCH_SIZE = 1000 # max number of logs DD API can accept @@ -106,20 +108,20 @@ class DataDogLogger(CustomBatchLogger): verbose_logger.debug( "Datadog: Logging - Enters logging function for model %s", kwargs ) - dd_payload = self.create_datadog_logging_payload( - kwargs=kwargs, - response_obj=response_obj, - start_time=start_time, - end_time=end_time, - ) + await self._log_async_event(kwargs, response_obj, start_time, end_time) - self.log_queue.append(dd_payload) + except Exception as e: + verbose_logger.exception( + f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}" + ) + pass + + async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): + try: verbose_logger.debug( - f"Datadog, event added to queue. Will flush in {self.flush_interval} seconds..." + "Datadog: Logging - Enters logging function for model %s", kwargs ) - - if len(self.log_queue) >= self.batch_size: - await self.async_send_batch() + await self._log_async_event(kwargs, response_obj, start_time, end_time) except Exception as e: verbose_logger.exception( @@ -181,12 +183,20 @@ class DataDogLogger(CustomBatchLogger): verbose_logger.debug( "Datadog: Logging - Enters logging function for model %s", kwargs ) - dd_payload = self.create_datadog_logging_payload( - kwargs=kwargs, - response_obj=response_obj, - start_time=start_time, - end_time=end_time, - ) + if litellm.datadog_use_v1 is True: + dd_payload = self._create_v0_logging_payload( + kwargs=kwargs, + response_obj=response_obj, + start_time=start_time, + end_time=end_time, + ) + else: + dd_payload = self.create_datadog_logging_payload( + kwargs=kwargs, + response_obj=response_obj, + start_time=start_time, + end_time=end_time, + ) response = self.sync_client.post( url=self.intake_url, @@ -215,6 +225,22 @@ class DataDogLogger(CustomBatchLogger): pass pass + async def _log_async_event(self, kwargs, response_obj, start_time, end_time): + dd_payload = self.create_datadog_logging_payload( + kwargs=kwargs, + response_obj=response_obj, + start_time=start_time, + end_time=end_time, + ) + + self.log_queue.append(dd_payload) + verbose_logger.debug( + f"Datadog, event added to queue. Will flush in {self.flush_interval} seconds..." + ) + + if len(self.log_queue) >= self.batch_size: + await self.async_send_batch() + def create_datadog_logging_payload( self, kwargs: Union[dict, Any], @@ -236,73 +262,29 @@ class DataDogLogger(CustomBatchLogger): """ import json - litellm_params = kwargs.get("litellm_params", {}) - metadata = ( - litellm_params.get("metadata", {}) or {} - ) # if litellm_params['metadata'] == None - messages = kwargs.get("messages") - optional_params = kwargs.get("optional_params", {}) - call_type = kwargs.get("call_type", "litellm.completion") - cache_hit = kwargs.get("cache_hit", False) - usage = response_obj["usage"] - id = response_obj.get("id", str(uuid.uuid4())) - usage = dict(usage) - try: - response_time = (end_time - start_time).total_seconds() * 1000 - except Exception: - response_time = None + standard_logging_object: Optional[StandardLoggingPayload] = kwargs.get( + "standard_logging_object", None + ) + if standard_logging_object is None: + raise ValueError("standard_logging_object not found in kwargs") - try: - response_obj = dict(response_obj) - except Exception: - response_obj = response_obj - - # Clean Metadata before logging - never log raw metadata - # the raw metadata can contain circular references which leads to infinite recursion - # we clean out all extra litellm metadata params before logging - clean_metadata = {} - if isinstance(metadata, dict): - for key, value in metadata.items(): - # clean litellm metadata before logging - if key in [ - "endpoint", - "caching_groups", - "previous_models", - ]: - continue - else: - clean_metadata[key] = value + status = DataDogStatus.INFO + if standard_logging_object.get("status") == "failure": + status = DataDogStatus.ERROR # Build the initial payload - payload = { - "id": id, - "call_type": call_type, - "cache_hit": cache_hit, - "start_time": start_time, - "end_time": end_time, - "response_time": response_time, - "model": kwargs.get("model", ""), - "user": kwargs.get("user", ""), - "model_parameters": optional_params, - "spend": kwargs.get("response_cost", 0), - "messages": messages, - "response": response_obj, - "usage": usage, - "metadata": clean_metadata, - } - - make_json_serializable(payload) - json_payload = json.dumps(payload) + make_json_serializable(standard_logging_object) + json_payload = json.dumps(standard_logging_object) verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload) dd_payload = DatadogPayload( - ddsource=os.getenv("DD_SOURCE", "litellm"), - ddtags="", - hostname="", + ddsource=self._get_datadog_source(), + ddtags=self._get_datadog_tags(), + hostname=self._get_datadog_hostname(), message=json_payload, - service="litellm-server", - status=DataDogStatus.INFO, + service=self._get_datadog_service(), + status=status, ) return dd_payload @@ -382,3 +364,140 @@ class DataDogLogger(CustomBatchLogger): No user has asked for this so far, this might be spammy on datatdog. If need arises we can implement this """ return + + async def async_post_call_failure_hook( + self, + request_data: dict, + original_exception: Exception, + user_api_key_dict: UserAPIKeyAuth, + ): + """ + Handles Proxy Errors (not-related to LLM API), ex: Authentication Errors + """ + import json + + _exception_payload = DatadogProxyFailureHookJsonMessage( + exception=str(original_exception), + error_class=str(original_exception.__class__.__name__), + status_code=getattr(original_exception, "status_code", None), + traceback=traceback.format_exc(), + user_api_key_dict=user_api_key_dict.model_dump(), + ) + + json_payload = json.dumps(_exception_payload) + verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload) + dd_payload = DatadogPayload( + ddsource=self._get_datadog_source(), + ddtags=self._get_datadog_tags(), + hostname=self._get_datadog_hostname(), + message=json_payload, + service=self._get_datadog_service(), + status=DataDogStatus.ERROR, + ) + + self.log_queue.append(dd_payload) + + def _create_v0_logging_payload( + self, + kwargs: Union[dict, Any], + response_obj: Any, + start_time: datetime.datetime, + end_time: datetime.datetime, + ) -> DatadogPayload: + """ + Note: This is our V1 Version of DataDog Logging Payload + + + (Not Recommended) If you want this to get logged set `litellm.datadog_use_v1 = True` + """ + import json + + litellm_params = kwargs.get("litellm_params", {}) + metadata = ( + litellm_params.get("metadata", {}) or {} + ) # if litellm_params['metadata'] == None + messages = kwargs.get("messages") + optional_params = kwargs.get("optional_params", {}) + call_type = kwargs.get("call_type", "litellm.completion") + cache_hit = kwargs.get("cache_hit", False) + usage = response_obj["usage"] + id = response_obj.get("id", str(uuid.uuid4())) + usage = dict(usage) + try: + response_time = (end_time - start_time).total_seconds() * 1000 + except Exception: + response_time = None + + try: + response_obj = dict(response_obj) + except Exception: + response_obj = response_obj + + # Clean Metadata before logging - never log raw metadata + # the raw metadata can contain circular references which leads to infinite recursion + # we clean out all extra litellm metadata params before logging + clean_metadata = {} + if isinstance(metadata, dict): + for key, value in metadata.items(): + # clean litellm metadata before logging + if key in [ + "endpoint", + "caching_groups", + "previous_models", + ]: + continue + else: + clean_metadata[key] = value + + # Build the initial payload + payload = { + "id": id, + "call_type": call_type, + "cache_hit": cache_hit, + "start_time": start_time, + "end_time": end_time, + "response_time": response_time, + "model": kwargs.get("model", ""), + "user": kwargs.get("user", ""), + "model_parameters": optional_params, + "spend": kwargs.get("response_cost", 0), + "messages": messages, + "response": response_obj, + "usage": usage, + "metadata": clean_metadata, + } + + make_json_serializable(payload) + json_payload = json.dumps(payload) + + verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload) + + dd_payload = DatadogPayload( + ddsource=self._get_datadog_source(), + ddtags=self._get_datadog_tags(), + hostname=self._get_datadog_hostname(), + message=json_payload, + service=self._get_datadog_service(), + status=DataDogStatus.INFO, + ) + return dd_payload + + @staticmethod + def _get_datadog_tags(): + return f"env:{os.getenv('DD_ENV', 'unknown')},service:{os.getenv('DD_SERVICE', 'litellm')},version:{os.getenv('DD_VERSION', 'unknown')}" + + @staticmethod + def _get_datadog_source(): + return os.getenv("DD_SOURCE", "litellm") + + @staticmethod + def _get_datadog_service(): + return os.getenv("DD_SERVICE", "litellm-server") + + @staticmethod + def _get_datadog_hostname(): + return "" + + @staticmethod + def _get_datadog_env(): + return os.getenv("DD_ENV", "unknown") diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index 6c08758dd..23ee97a47 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -458,7 +458,7 @@ class AmazonConverseConfig: """ Abbreviations of regions AWS Bedrock supports for cross region inference """ - return ["us", "eu"] + return ["us", "eu", "apac"] def _get_base_model(self, model: str) -> str: """ diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py index f5c4f694d..f4d20f8fb 100644 --- a/litellm/llms/custom_httpx/http_handler.py +++ b/litellm/llms/custom_httpx/http_handler.py @@ -28,6 +28,62 @@ headers = { _DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0) _DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600 # 1 hour, re-use the same httpx client for 1 hour +import re + + +def mask_sensitive_info(error_message): + # Find the start of the key parameter + if isinstance(error_message, str): + key_index = error_message.find("key=") + else: + return error_message + + # If key is found + if key_index != -1: + # Find the end of the key parameter (next & or end of string) + next_param = error_message.find("&", key_index) + + if next_param == -1: + # If no more parameters, mask until the end of the string + masked_message = error_message[: key_index + 4] + "[REDACTED_API_KEY]" + else: + # Replace the key with redacted value, keeping other parameters + masked_message = ( + error_message[: key_index + 4] + + "[REDACTED_API_KEY]" + + error_message[next_param:] + ) + + return masked_message + + return error_message + + +class MaskedHTTPStatusError(httpx.HTTPStatusError): + def __init__( + self, original_error, message: Optional[str] = None, text: Optional[str] = None + ): + # Create a new error with the masked URL + masked_url = mask_sensitive_info(str(original_error.request.url)) + # Create a new error that looks like the original, but with a masked URL + + super().__init__( + message=original_error.message, + request=httpx.Request( + method=original_error.request.method, + url=masked_url, + headers=original_error.request.headers, + content=original_error.request.content, + ), + response=httpx.Response( + status_code=original_error.response.status_code, + content=original_error.response.content, + headers=original_error.response.headers, + ), + ) + self.message = message + self.text = text + class AsyncHTTPHandler: def __init__( @@ -155,13 +211,16 @@ class AsyncHTTPHandler: headers=headers, ) except httpx.HTTPStatusError as e: - setattr(e, "status_code", e.response.status_code) + if stream is True: setattr(e, "message", await e.response.aread()) setattr(e, "text", await e.response.aread()) else: - setattr(e, "message", e.response.text) - setattr(e, "text", e.response.text) + setattr(e, "message", mask_sensitive_info(e.response.text)) + setattr(e, "text", mask_sensitive_info(e.response.text)) + + setattr(e, "status_code", e.response.status_code) + raise e except Exception as e: raise e @@ -399,11 +458,17 @@ class HTTPHandler: llm_provider="litellm-httpx-handler", ) except httpx.HTTPStatusError as e: - setattr(e, "status_code", e.response.status_code) + if stream is True: - setattr(e, "message", e.response.read()) + setattr(e, "message", mask_sensitive_info(e.response.read())) + setattr(e, "text", mask_sensitive_info(e.response.read())) else: - setattr(e, "message", e.response.text) + error_text = mask_sensitive_info(e.response.text) + setattr(e, "message", error_text) + setattr(e, "text", error_text) + + setattr(e, "status_code", e.response.status_code) + raise e except Exception as e: raise e diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 45b7a6c5b..bfd35ca47 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -33,6 +33,7 @@ from litellm.types.llms.openai import ( ChatCompletionAssistantToolCall, ChatCompletionFunctionMessage, ChatCompletionImageObject, + ChatCompletionImageUrlObject, ChatCompletionTextObject, ChatCompletionToolCallFunctionChunk, ChatCompletionToolMessage, @@ -681,6 +682,27 @@ def construct_tool_use_system_prompt( return tool_use_system_prompt +def convert_generic_image_chunk_to_openai_image_obj( + image_chunk: GenericImageParsingChunk, +) -> str: + """ + Convert a generic image chunk to an OpenAI image object. + + Input: + GenericImageParsingChunk( + type="base64", + media_type="image/jpeg", + data="...", + ) + + Return: + "data:image/jpeg;base64,{base64_image}" + """ + return "data:{};{},{}".format( + image_chunk["media_type"], image_chunk["type"], image_chunk["data"] + ) + + def convert_to_anthropic_image_obj(openai_image_url: str) -> GenericImageParsingChunk: """ Input: @@ -706,6 +728,7 @@ def convert_to_anthropic_image_obj(openai_image_url: str) -> GenericImageParsing data=base64_data, ) except Exception as e: + traceback.print_exc() if "Error: Unable to fetch image from URL" in str(e): raise e raise Exception( @@ -1136,15 +1159,44 @@ def convert_to_anthropic_tool_result( ] } """ - content_str: str = "" + anthropic_content: Union[ + str, + List[Union[AnthropicMessagesToolResultContent, AnthropicMessagesImageParam]], + ] = "" if isinstance(message["content"], str): - content_str = message["content"] + anthropic_content = message["content"] elif isinstance(message["content"], List): content_list = message["content"] + anthropic_content_list: List[ + Union[AnthropicMessagesToolResultContent, AnthropicMessagesImageParam] + ] = [] for content in content_list: if content["type"] == "text": - content_str += content["text"] + anthropic_content_list.append( + AnthropicMessagesToolResultContent( + type="text", + text=content["text"], + ) + ) + elif content["type"] == "image_url": + if isinstance(content["image_url"], str): + image_chunk = convert_to_anthropic_image_obj(content["image_url"]) + else: + image_chunk = convert_to_anthropic_image_obj( + content["image_url"]["url"] + ) + anthropic_content_list.append( + AnthropicMessagesImageParam( + type="image", + source=AnthropicContentParamSource( + type="base64", + media_type=image_chunk["media_type"], + data=image_chunk["data"], + ), + ) + ) + anthropic_content = anthropic_content_list anthropic_tool_result: Optional[AnthropicMessagesToolResultParam] = None ## PROMPT CACHING CHECK ## cache_control = message.get("cache_control", None) @@ -1155,14 +1207,14 @@ def convert_to_anthropic_tool_result( # We can't determine from openai message format whether it's a successful or # error call result so default to the successful result template anthropic_tool_result = AnthropicMessagesToolResultParam( - type="tool_result", tool_use_id=tool_call_id, content=content_str + type="tool_result", tool_use_id=tool_call_id, content=anthropic_content ) if message["role"] == "function": function_message: ChatCompletionFunctionMessage = message tool_call_id = function_message.get("tool_call_id") or str(uuid.uuid4()) anthropic_tool_result = AnthropicMessagesToolResultParam( - type="tool_result", tool_use_id=tool_call_id, content=content_str + type="tool_result", tool_use_id=tool_call_id, content=anthropic_content ) if anthropic_tool_result is None: diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py index f828d93c8..c9fe6e3f4 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py @@ -107,6 +107,10 @@ def _get_image_mime_type_from_url(url: str) -> Optional[str]: return "image/png" elif url.endswith(".webp"): return "image/webp" + elif url.endswith(".mp4"): + return "video/mp4" + elif url.endswith(".pdf"): + return "application/pdf" return None @@ -294,7 +298,12 @@ def _transform_request_body( optional_params = {k: v for k, v in optional_params.items() if k not in remove_keys} try: - content = _gemini_convert_messages_with_history(messages=messages) + if custom_llm_provider == "gemini": + content = litellm.GoogleAIStudioGeminiConfig._transform_messages( + messages=messages + ) + else: + content = litellm.VertexGeminiConfig._transform_messages(messages=messages) tools: Optional[Tools] = optional_params.pop("tools", None) tool_choice: Optional[ToolConfig] = optional_params.pop("tool_choice", None) safety_settings: Optional[List[SafetSettingsConfig]] = optional_params.pop( diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py index f2fc599ed..4287ed1bc 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py @@ -35,7 +35,12 @@ from litellm.llms.custom_httpx.http_handler import ( HTTPHandler, get_async_httpx_client, ) +from litellm.llms.prompt_templates.factory import ( + convert_generic_image_chunk_to_openai_image_obj, + convert_to_anthropic_image_obj, +) from litellm.types.llms.openai import ( + AllMessageValues, ChatCompletionResponseMessage, ChatCompletionToolCallChunk, ChatCompletionToolCallFunctionChunk, @@ -78,6 +83,8 @@ from ..common_utils import ( ) from ..vertex_llm_base import VertexBase from .transformation import ( + _gemini_convert_messages_with_history, + _process_gemini_image, async_transform_request_body, set_headers, sync_transform_request_body, @@ -912,6 +919,10 @@ class VertexGeminiConfig: return model_response + @staticmethod + def _transform_messages(messages: List[AllMessageValues]) -> List[ContentType]: + return _gemini_convert_messages_with_history(messages=messages) + class GoogleAIStudioGeminiConfig( VertexGeminiConfig @@ -1015,6 +1026,32 @@ class GoogleAIStudioGeminiConfig( model, non_default_params, optional_params, drop_params ) + @staticmethod + def _transform_messages(messages: List[AllMessageValues]) -> List[ContentType]: + """ + Google AI Studio Gemini does not support image urls in messages. + """ + for message in messages: + _message_content = message.get("content") + if _message_content is not None and isinstance(_message_content, list): + _parts: List[PartType] = [] + for element in _message_content: + if element.get("type") == "image_url": + img_element = element + _image_url: Optional[str] = None + if isinstance(img_element.get("image_url"), dict): + _image_url = img_element["image_url"].get("url") # type: ignore + else: + _image_url = img_element.get("image_url") # type: ignore + if _image_url and "https://" in _image_url: + image_obj = convert_to_anthropic_image_obj(_image_url) + img_element["image_url"] = ( # type: ignore + convert_generic_image_chunk_to_openai_image_obj( + image_obj + ) + ) + return _gemini_convert_messages_with_history(messages=messages) + async def make_call( client: Optional[AsyncHTTPHandler], diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index a56472f7f..ac22871bc 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -2032,7 +2032,6 @@ "tool_use_system_prompt_tokens": 264, "supports_assistant_prefill": true, "supports_prompt_caching": true, - "supports_pdf_input": true, "supports_response_schema": true }, "claude-3-opus-20240229": { @@ -2098,6 +2097,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159, "supports_assistant_prefill": true, + "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true }, @@ -3383,6 +3383,8 @@ "supports_vision": true, "supports_response_schema": true, "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 2000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-flash-001": { @@ -3406,6 +3408,8 @@ "supports_vision": true, "supports_response_schema": true, "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 2000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-flash": { @@ -3428,6 +3432,8 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 2000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-flash-latest": { @@ -3450,6 +3456,32 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 2000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-1.5-flash-8b": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 4000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-flash-8b-exp-0924": { @@ -3472,6 +3504,8 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 4000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-exp-1114": { @@ -3494,7 +3528,12 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, - "source": "https://ai.google.dev/pricing" + "tpm": 4000000, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "metadata": { + "notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro." + } }, "gemini/gemini-1.5-flash-exp-0827": { "max_tokens": 8192, @@ -3516,6 +3555,8 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 2000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-flash-8b-exp-0827": { @@ -3537,6 +3578,9 @@ "supports_system_messages": true, "supports_function_calling": true, "supports_vision": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 4000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-pro": { @@ -3550,7 +3594,10 @@ "litellm_provider": "gemini", "mode": "chat", "supports_function_calling": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + "rpd": 30000, + "tpm": 120000, + "rpm": 360, + "source": "https://ai.google.dev/gemini-api/docs/models/gemini" }, "gemini/gemini-1.5-pro": { "max_tokens": 8192, @@ -3567,6 +3614,8 @@ "supports_vision": true, "supports_tool_choice": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-pro-002": { @@ -3585,6 +3634,8 @@ "supports_tool_choice": true, "supports_response_schema": true, "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 1000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-pro-001": { @@ -3603,6 +3654,8 @@ "supports_tool_choice": true, "supports_response_schema": true, "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 1000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-pro-exp-0801": { @@ -3620,6 +3673,8 @@ "supports_vision": true, "supports_tool_choice": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-pro-exp-0827": { @@ -3637,6 +3692,8 @@ "supports_vision": true, "supports_tool_choice": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-pro-latest": { @@ -3654,6 +3711,8 @@ "supports_vision": true, "supports_tool_choice": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-pro-vision": { @@ -3668,6 +3727,9 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, + "rpd": 30000, + "tpm": 120000, + "rpm": 360, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "gemini/gemini-gemma-2-27b-it": { diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/131-3d2257b0ff5aadb2.js b/litellm/proxy/_experimental/out/_next/static/chunks/131-3d2257b0ff5aadb2.js deleted file mode 100644 index 51181e75a..000000000 --- a/litellm/proxy/_experimental/out/_next/static/chunks/131-3d2257b0ff5aadb2.js +++ /dev/null @@ -1,8 +0,0 @@ -"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[131],{84174:function(e,t,n){n.d(t,{Z:function(){return s}});var a=n(14749),r=n(2265),i={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M832 64H296c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h496v688c0 4.4 3.6 8 8 8h56c4.4 0 8-3.6 8-8V96c0-17.7-14.3-32-32-32zM704 192H192c-17.7 0-32 14.3-32 32v530.7c0 8.5 3.4 16.6 9.4 22.6l173.3 173.3c2.2 2.2 4.7 4 7.4 5.5v1.9h4.2c3.5 1.3 7.2 2 11 2H704c17.7 0 32-14.3 32-32V224c0-17.7-14.3-32-32-32zM350 856.2L263.9 770H350v86.2zM664 888H414V746c0-22.1-17.9-40-40-40H232V264h432v624z"}}]},name:"copy",theme:"outlined"},o=n(60688),s=r.forwardRef(function(e,t){return r.createElement(o.Z,(0,a.Z)({},e,{ref:t,icon:i}))})},50459:function(e,t,n){n.d(t,{Z:function(){return s}});var a=n(14749),r=n(2265),i={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M765.7 486.8L314.9 134.7A7.97 7.97 0 00302 141v77.3c0 4.9 2.3 9.6 6.1 12.6l360 281.1-360 281.1c-3.9 3-6.1 7.7-6.1 12.6V883c0 6.7 7.7 10.4 12.9 6.3l450.8-352.1a31.96 31.96 0 000-50.4z"}}]},name:"right",theme:"outlined"},o=n(60688),s=r.forwardRef(function(e,t){return r.createElement(o.Z,(0,a.Z)({},e,{ref:t,icon:i}))})},92836:function(e,t,n){n.d(t,{Z:function(){return p}});var a=n(69703),r=n(80991),i=n(2898),o=n(99250),s=n(65492),l=n(2265),c=n(41608),d=n(50027);n(18174),n(21871),n(41213);let u=(0,s.fn)("Tab"),p=l.forwardRef((e,t)=>{let{icon:n,className:p,children:g}=e,m=(0,a._T)(e,["icon","className","children"]),b=(0,l.useContext)(c.O),f=(0,l.useContext)(d.Z);return l.createElement(r.O,Object.assign({ref:t,className:(0,o.q)(u("root"),"flex whitespace-nowrap truncate max-w-xs outline-none focus:ring-0 text-tremor-default transition duration-100",f?(0,s.bM)(f,i.K.text).selectTextColor:"solid"===b?"ui-selected:text-tremor-content-emphasis dark:ui-selected:text-dark-tremor-content-emphasis":"ui-selected:text-tremor-brand dark:ui-selected:text-dark-tremor-brand",function(e,t){switch(e){case"line":return(0,o.q)("ui-selected:border-b-2 hover:border-b-2 border-transparent transition duration-100 -mb-px px-2 py-2","hover:border-tremor-content hover:text-tremor-content-emphasis text-tremor-content","dark:hover:border-dark-tremor-content-emphasis dark:hover:text-dark-tremor-content-emphasis dark:text-dark-tremor-content",t?(0,s.bM)(t,i.K.border).selectBorderColor:"ui-selected:border-tremor-brand dark:ui-selected:border-dark-tremor-brand");case"solid":return(0,o.q)("border-transparent border rounded-tremor-small px-2.5 py-1","ui-selected:border-tremor-border ui-selected:bg-tremor-background ui-selected:shadow-tremor-input hover:text-tremor-content-emphasis ui-selected:text-tremor-brand","dark:ui-selected:border-dark-tremor-border dark:ui-selected:bg-dark-tremor-background dark:ui-selected:shadow-dark-tremor-input dark:hover:text-dark-tremor-content-emphasis dark:ui-selected:text-dark-tremor-brand",t?(0,s.bM)(t,i.K.text).selectTextColor:"text-tremor-content dark:text-dark-tremor-content")}}(b,f),p)},m),n?l.createElement(n,{className:(0,o.q)(u("icon"),"flex-none h-5 w-5",g?"mr-2":"")}):null,g?l.createElement("span",null,g):null)});p.displayName="Tab"},26734:function(e,t,n){n.d(t,{Z:function(){return c}});var a=n(69703),r=n(80991),i=n(99250),o=n(65492),s=n(2265);let l=(0,o.fn)("TabGroup"),c=s.forwardRef((e,t)=>{let{defaultIndex:n,index:o,onIndexChange:c,children:d,className:u}=e,p=(0,a._T)(e,["defaultIndex","index","onIndexChange","children","className"]);return s.createElement(r.O.Group,Object.assign({as:"div",ref:t,defaultIndex:n,selectedIndex:o,onChange:c,className:(0,i.q)(l("root"),"w-full",u)},p),d)});c.displayName="TabGroup"},41608:function(e,t,n){n.d(t,{O:function(){return c},Z:function(){return u}});var a=n(69703),r=n(2265),i=n(50027);n(18174),n(21871),n(41213);var o=n(80991),s=n(99250);let l=(0,n(65492).fn)("TabList"),c=(0,r.createContext)("line"),d={line:(0,s.q)("flex border-b space-x-4","border-tremor-border","dark:border-dark-tremor-border"),solid:(0,s.q)("inline-flex p-0.5 rounded-tremor-default space-x-1.5","bg-tremor-background-subtle","dark:bg-dark-tremor-background-subtle")},u=r.forwardRef((e,t)=>{let{color:n,variant:u="line",children:p,className:g}=e,m=(0,a._T)(e,["color","variant","children","className"]);return r.createElement(o.O.List,Object.assign({ref:t,className:(0,s.q)(l("root"),"justify-start overflow-x-clip",d[u],g)},m),r.createElement(c.Provider,{value:u},r.createElement(i.Z.Provider,{value:n},p)))});u.displayName="TabList"},32126:function(e,t,n){n.d(t,{Z:function(){return d}});var a=n(69703);n(50027);var r=n(18174);n(21871);var i=n(41213),o=n(99250),s=n(65492),l=n(2265);let c=(0,s.fn)("TabPanel"),d=l.forwardRef((e,t)=>{let{children:n,className:s}=e,d=(0,a._T)(e,["children","className"]),{selectedValue:u}=(0,l.useContext)(i.Z),p=u===(0,l.useContext)(r.Z);return l.createElement("div",Object.assign({ref:t,className:(0,o.q)(c("root"),"w-full mt-2",p?"":"hidden",s),"aria-selected":p?"true":"false"},d),n)});d.displayName="TabPanel"},23682:function(e,t,n){n.d(t,{Z:function(){return u}});var a=n(69703),r=n(80991);n(50027);var i=n(18174);n(21871);var o=n(41213),s=n(99250),l=n(65492),c=n(2265);let d=(0,l.fn)("TabPanels"),u=c.forwardRef((e,t)=>{let{children:n,className:l}=e,u=(0,a._T)(e,["children","className"]);return c.createElement(r.O.Panels,Object.assign({as:"div",ref:t,className:(0,s.q)(d("root"),"w-full",l)},u),e=>{let{selectedIndex:t}=e;return c.createElement(o.Z.Provider,{value:{selectedValue:t}},c.Children.map(n,(e,t)=>c.createElement(i.Z.Provider,{value:t},e)))})});u.displayName="TabPanels"},50027:function(e,t,n){n.d(t,{Z:function(){return i}});var a=n(2265),r=n(54942);n(99250);let i=(0,a.createContext)(r.fr.Blue)},18174:function(e,t,n){n.d(t,{Z:function(){return a}});let a=(0,n(2265).createContext)(0)},21871:function(e,t,n){n.d(t,{Z:function(){return a}});let a=(0,n(2265).createContext)(void 0)},41213:function(e,t,n){n.d(t,{Z:function(){return a}});let a=(0,n(2265).createContext)({selectedValue:void 0,handleValueChange:void 0})},21467:function(e,t,n){n.d(t,{i:function(){return s}});var a=n(2265),r=n(44329),i=n(54165),o=n(57499);function s(e){return t=>a.createElement(i.ZP,{theme:{token:{motion:!1,zIndexPopupBase:0}}},a.createElement(e,Object.assign({},t)))}t.Z=(e,t,n,i)=>s(s=>{let{prefixCls:l,style:c}=s,d=a.useRef(null),[u,p]=a.useState(0),[g,m]=a.useState(0),[b,f]=(0,r.Z)(!1,{value:s.open}),{getPrefixCls:E}=a.useContext(o.E_),h=E(t||"select",l);a.useEffect(()=>{if(f(!0),"undefined"!=typeof ResizeObserver){let e=new ResizeObserver(e=>{let t=e[0].target;p(t.offsetHeight+8),m(t.offsetWidth)}),t=setInterval(()=>{var a;let r=n?".".concat(n(h)):".".concat(h,"-dropdown"),i=null===(a=d.current)||void 0===a?void 0:a.querySelector(r);i&&(clearInterval(t),e.observe(i))},10);return()=>{clearInterval(t),e.disconnect()}}},[]);let S=Object.assign(Object.assign({},s),{style:Object.assign(Object.assign({},c),{margin:0}),open:b,visible:b,getPopupContainer:()=>d.current});return i&&(S=i(S)),a.createElement("div",{ref:d,style:{paddingBottom:u,position:"relative",minWidth:g}},a.createElement(e,Object.assign({},S)))})},99129:function(e,t,n){let a;n.d(t,{Z:function(){return eY}});var r=n(63787),i=n(2265),o=n(37274),s=n(57499),l=n(54165),c=n(99537),d=n(77136),u=n(20653),p=n(40388),g=n(16480),m=n.n(g),b=n(51761),f=n(47387),E=n(70595),h=n(24750),S=n(89211),y=n(13565),T=n(51350),A=e=>{let{type:t,children:n,prefixCls:a,buttonProps:r,close:o,autoFocus:s,emitEvent:l,isSilent:c,quitOnNullishReturnValue:d,actionFn:u}=e,p=i.useRef(!1),g=i.useRef(null),[m,b]=(0,S.Z)(!1),f=function(){null==o||o.apply(void 0,arguments)};i.useEffect(()=>{let e=null;return s&&(e=setTimeout(()=>{var e;null===(e=g.current)||void 0===e||e.focus()})),()=>{e&&clearTimeout(e)}},[]);let E=e=>{e&&e.then&&(b(!0),e.then(function(){b(!1,!0),f.apply(void 0,arguments),p.current=!1},e=>{if(b(!1,!0),p.current=!1,null==c||!c())return Promise.reject(e)}))};return i.createElement(y.ZP,Object.assign({},(0,T.nx)(t),{onClick:e=>{let t;if(!p.current){if(p.current=!0,!u){f();return}if(l){var n;if(t=u(e),d&&!((n=t)&&n.then)){p.current=!1,f(e);return}}else if(u.length)t=u(o),p.current=!1;else if(!(t=u())){f();return}E(t)}},loading:m,prefixCls:a},r,{ref:g}),n)};let R=i.createContext({}),{Provider:I}=R;var N=()=>{let{autoFocusButton:e,cancelButtonProps:t,cancelTextLocale:n,isSilent:a,mergedOkCancel:r,rootPrefixCls:o,close:s,onCancel:l,onConfirm:c}=(0,i.useContext)(R);return r?i.createElement(A,{isSilent:a,actionFn:l,close:function(){null==s||s.apply(void 0,arguments),null==c||c(!1)},autoFocus:"cancel"===e,buttonProps:t,prefixCls:"".concat(o,"-btn")},n):null},_=()=>{let{autoFocusButton:e,close:t,isSilent:n,okButtonProps:a,rootPrefixCls:r,okTextLocale:o,okType:s,onConfirm:l,onOk:c}=(0,i.useContext)(R);return i.createElement(A,{isSilent:n,type:s||"primary",actionFn:c,close:function(){null==t||t.apply(void 0,arguments),null==l||l(!0)},autoFocus:"ok"===e,buttonProps:a,prefixCls:"".concat(r,"-btn")},o)},v=n(81303),w=n(14749),k=n(80406),C=n(88804),O=i.createContext({}),x=n(5239),L=n(31506),D=n(91010),P=n(4295),M=n(72480);function F(e,t,n){var a=t;return!a&&n&&(a="".concat(e,"-").concat(n)),a}function U(e,t){var n=e["page".concat(t?"Y":"X","Offset")],a="scroll".concat(t?"Top":"Left");if("number"!=typeof n){var r=e.document;"number"!=typeof(n=r.documentElement[a])&&(n=r.body[a])}return n}var B=n(49367),G=n(74084),$=i.memo(function(e){return e.children},function(e,t){return!t.shouldUpdate}),H={width:0,height:0,overflow:"hidden",outline:"none"},z=i.forwardRef(function(e,t){var n,a,r,o=e.prefixCls,s=e.className,l=e.style,c=e.title,d=e.ariaId,u=e.footer,p=e.closable,g=e.closeIcon,b=e.onClose,f=e.children,E=e.bodyStyle,h=e.bodyProps,S=e.modalRender,y=e.onMouseDown,T=e.onMouseUp,A=e.holderRef,R=e.visible,I=e.forceRender,N=e.width,_=e.height,v=e.classNames,k=e.styles,C=i.useContext(O).panel,L=(0,G.x1)(A,C),D=(0,i.useRef)(),P=(0,i.useRef)();i.useImperativeHandle(t,function(){return{focus:function(){var e;null===(e=D.current)||void 0===e||e.focus()},changeActive:function(e){var t=document.activeElement;e&&t===P.current?D.current.focus():e||t!==D.current||P.current.focus()}}});var M={};void 0!==N&&(M.width=N),void 0!==_&&(M.height=_),u&&(n=i.createElement("div",{className:m()("".concat(o,"-footer"),null==v?void 0:v.footer),style:(0,x.Z)({},null==k?void 0:k.footer)},u)),c&&(a=i.createElement("div",{className:m()("".concat(o,"-header"),null==v?void 0:v.header),style:(0,x.Z)({},null==k?void 0:k.header)},i.createElement("div",{className:"".concat(o,"-title"),id:d},c))),p&&(r=i.createElement("button",{type:"button",onClick:b,"aria-label":"Close",className:"".concat(o,"-close")},g||i.createElement("span",{className:"".concat(o,"-close-x")})));var F=i.createElement("div",{className:m()("".concat(o,"-content"),null==v?void 0:v.content),style:null==k?void 0:k.content},r,a,i.createElement("div",(0,w.Z)({className:m()("".concat(o,"-body"),null==v?void 0:v.body),style:(0,x.Z)((0,x.Z)({},E),null==k?void 0:k.body)},h),f),n);return i.createElement("div",{key:"dialog-element",role:"dialog","aria-labelledby":c?d:null,"aria-modal":"true",ref:L,style:(0,x.Z)((0,x.Z)({},l),M),className:m()(o,s),onMouseDown:y,onMouseUp:T},i.createElement("div",{tabIndex:0,ref:D,style:H,"aria-hidden":"true"}),i.createElement($,{shouldUpdate:R||I},S?S(F):F),i.createElement("div",{tabIndex:0,ref:P,style:H,"aria-hidden":"true"}))}),j=i.forwardRef(function(e,t){var n=e.prefixCls,a=e.title,r=e.style,o=e.className,s=e.visible,l=e.forceRender,c=e.destroyOnClose,d=e.motionName,u=e.ariaId,p=e.onVisibleChanged,g=e.mousePosition,b=(0,i.useRef)(),f=i.useState(),E=(0,k.Z)(f,2),h=E[0],S=E[1],y={};function T(){var e,t,n,a,r,i=(n={left:(t=(e=b.current).getBoundingClientRect()).left,top:t.top},r=(a=e.ownerDocument).defaultView||a.parentWindow,n.left+=U(r),n.top+=U(r,!0),n);S(g?"".concat(g.x-i.left,"px ").concat(g.y-i.top,"px"):"")}return h&&(y.transformOrigin=h),i.createElement(B.ZP,{visible:s,onVisibleChanged:p,onAppearPrepare:T,onEnterPrepare:T,forceRender:l,motionName:d,removeOnLeave:c,ref:b},function(s,l){var c=s.className,d=s.style;return i.createElement(z,(0,w.Z)({},e,{ref:t,title:a,ariaId:u,prefixCls:n,holderRef:l,style:(0,x.Z)((0,x.Z)((0,x.Z)({},d),r),y),className:m()(o,c)}))})});function V(e){var t=e.prefixCls,n=e.style,a=e.visible,r=e.maskProps,o=e.motionName,s=e.className;return i.createElement(B.ZP,{key:"mask",visible:a,motionName:o,leavedClassName:"".concat(t,"-mask-hidden")},function(e,a){var o=e.className,l=e.style;return i.createElement("div",(0,w.Z)({ref:a,style:(0,x.Z)((0,x.Z)({},l),n),className:m()("".concat(t,"-mask"),o,s)},r))})}function W(e){var t=e.prefixCls,n=void 0===t?"rc-dialog":t,a=e.zIndex,r=e.visible,o=void 0!==r&&r,s=e.keyboard,l=void 0===s||s,c=e.focusTriggerAfterClose,d=void 0===c||c,u=e.wrapStyle,p=e.wrapClassName,g=e.wrapProps,b=e.onClose,f=e.afterOpenChange,E=e.afterClose,h=e.transitionName,S=e.animation,y=e.closable,T=e.mask,A=void 0===T||T,R=e.maskTransitionName,I=e.maskAnimation,N=e.maskClosable,_=e.maskStyle,v=e.maskProps,C=e.rootClassName,O=e.classNames,U=e.styles,B=(0,i.useRef)(),G=(0,i.useRef)(),$=(0,i.useRef)(),H=i.useState(o),z=(0,k.Z)(H,2),W=z[0],q=z[1],Y=(0,D.Z)();function K(e){null==b||b(e)}var Z=(0,i.useRef)(!1),X=(0,i.useRef)(),Q=null;return(void 0===N||N)&&(Q=function(e){Z.current?Z.current=!1:G.current===e.target&&K(e)}),(0,i.useEffect)(function(){o&&(q(!0),(0,L.Z)(G.current,document.activeElement)||(B.current=document.activeElement))},[o]),(0,i.useEffect)(function(){return function(){clearTimeout(X.current)}},[]),i.createElement("div",(0,w.Z)({className:m()("".concat(n,"-root"),C)},(0,M.Z)(e,{data:!0})),i.createElement(V,{prefixCls:n,visible:A&&o,motionName:F(n,R,I),style:(0,x.Z)((0,x.Z)({zIndex:a},_),null==U?void 0:U.mask),maskProps:v,className:null==O?void 0:O.mask}),i.createElement("div",(0,w.Z)({tabIndex:-1,onKeyDown:function(e){if(l&&e.keyCode===P.Z.ESC){e.stopPropagation(),K(e);return}o&&e.keyCode===P.Z.TAB&&$.current.changeActive(!e.shiftKey)},className:m()("".concat(n,"-wrap"),p,null==O?void 0:O.wrapper),ref:G,onClick:Q,style:(0,x.Z)((0,x.Z)((0,x.Z)({zIndex:a},u),null==U?void 0:U.wrapper),{},{display:W?null:"none"})},g),i.createElement(j,(0,w.Z)({},e,{onMouseDown:function(){clearTimeout(X.current),Z.current=!0},onMouseUp:function(){X.current=setTimeout(function(){Z.current=!1})},ref:$,closable:void 0===y||y,ariaId:Y,prefixCls:n,visible:o&&W,onClose:K,onVisibleChanged:function(e){if(e)!function(){if(!(0,L.Z)(G.current,document.activeElement)){var e;null===(e=$.current)||void 0===e||e.focus()}}();else{if(q(!1),A&&B.current&&d){try{B.current.focus({preventScroll:!0})}catch(e){}B.current=null}W&&(null==E||E())}null==f||f(e)},motionName:F(n,h,S)}))))}j.displayName="Content",n(53850);var q=function(e){var t=e.visible,n=e.getContainer,a=e.forceRender,r=e.destroyOnClose,o=void 0!==r&&r,s=e.afterClose,l=e.panelRef,c=i.useState(t),d=(0,k.Z)(c,2),u=d[0],p=d[1],g=i.useMemo(function(){return{panel:l}},[l]);return(i.useEffect(function(){t&&p(!0)},[t]),a||!o||u)?i.createElement(O.Provider,{value:g},i.createElement(C.Z,{open:t||a||u,autoDestroy:!1,getContainer:n,autoLock:t||u},i.createElement(W,(0,w.Z)({},e,{destroyOnClose:o,afterClose:function(){null==s||s(),p(!1)}})))):null};q.displayName="Dialog";var Y=function(e,t,n){let a=arguments.length>3&&void 0!==arguments[3]?arguments[3]:i.createElement(v.Z,null),r=arguments.length>4&&void 0!==arguments[4]&&arguments[4];if("boolean"==typeof e?!e:void 0===t?!r:!1===t||null===t)return[!1,null];let o="boolean"==typeof t||null==t?a:t;return[!0,n?n(o):o]},K=n(22127),Z=n(86718),X=n(47137),Q=n(92801),J=n(48563);function ee(){}let et=i.createContext({add:ee,remove:ee});var en=n(17094),ea=()=>{let{cancelButtonProps:e,cancelTextLocale:t,onCancel:n}=(0,i.useContext)(R);return i.createElement(y.ZP,Object.assign({onClick:n},e),t)},er=()=>{let{confirmLoading:e,okButtonProps:t,okType:n,okTextLocale:a,onOk:r}=(0,i.useContext)(R);return i.createElement(y.ZP,Object.assign({},(0,T.nx)(n),{loading:e,onClick:r},t),a)},ei=n(4678);function eo(e,t){return i.createElement("span",{className:"".concat(e,"-close-x")},t||i.createElement(v.Z,{className:"".concat(e,"-close-icon")}))}let es=e=>{let t;let{okText:n,okType:a="primary",cancelText:o,confirmLoading:s,onOk:l,onCancel:c,okButtonProps:d,cancelButtonProps:u,footer:p}=e,[g]=(0,E.Z)("Modal",(0,ei.A)()),m={confirmLoading:s,okButtonProps:d,cancelButtonProps:u,okTextLocale:n||(null==g?void 0:g.okText),cancelTextLocale:o||(null==g?void 0:g.cancelText),okType:a,onOk:l,onCancel:c},b=i.useMemo(()=>m,(0,r.Z)(Object.values(m)));return"function"==typeof p||void 0===p?(t=i.createElement(i.Fragment,null,i.createElement(ea,null),i.createElement(er,null)),"function"==typeof p&&(t=p(t,{OkBtn:er,CancelBtn:ea})),t=i.createElement(I,{value:b},t)):t=p,i.createElement(en.n,{disabled:!1},t)};var el=n(11303),ec=n(13703),ed=n(58854),eu=n(80316),ep=n(76585),eg=n(8985);function em(e){return{position:e,inset:0}}let eb=e=>{let{componentCls:t,antCls:n}=e;return[{["".concat(t,"-root")]:{["".concat(t).concat(n,"-zoom-enter, ").concat(t).concat(n,"-zoom-appear")]:{transform:"none",opacity:0,animationDuration:e.motionDurationSlow,userSelect:"none"},["".concat(t).concat(n,"-zoom-leave ").concat(t,"-content")]:{pointerEvents:"none"},["".concat(t,"-mask")]:Object.assign(Object.assign({},em("fixed")),{zIndex:e.zIndexPopupBase,height:"100%",backgroundColor:e.colorBgMask,pointerEvents:"none",["".concat(t,"-hidden")]:{display:"none"}}),["".concat(t,"-wrap")]:Object.assign(Object.assign({},em("fixed")),{zIndex:e.zIndexPopupBase,overflow:"auto",outline:0,WebkitOverflowScrolling:"touch",["&:has(".concat(t).concat(n,"-zoom-enter), &:has(").concat(t).concat(n,"-zoom-appear)")]:{pointerEvents:"none"}})}},{["".concat(t,"-root")]:(0,ec.J$)(e)}]},ef=e=>{let{componentCls:t}=e;return[{["".concat(t,"-root")]:{["".concat(t,"-wrap-rtl")]:{direction:"rtl"},["".concat(t,"-centered")]:{textAlign:"center","&::before":{display:"inline-block",width:0,height:"100%",verticalAlign:"middle",content:'""'},[t]:{top:0,display:"inline-block",paddingBottom:0,textAlign:"start",verticalAlign:"middle"}},["@media (max-width: ".concat(e.screenSMMax,"px)")]:{[t]:{maxWidth:"calc(100vw - 16px)",margin:"".concat((0,eg.bf)(e.marginXS)," auto")},["".concat(t,"-centered")]:{[t]:{flex:1}}}}},{[t]:Object.assign(Object.assign({},(0,el.Wf)(e)),{pointerEvents:"none",position:"relative",top:100,width:"auto",maxWidth:"calc(100vw - ".concat((0,eg.bf)(e.calc(e.margin).mul(2).equal()),")"),margin:"0 auto",paddingBottom:e.paddingLG,["".concat(t,"-title")]:{margin:0,color:e.titleColor,fontWeight:e.fontWeightStrong,fontSize:e.titleFontSize,lineHeight:e.titleLineHeight,wordWrap:"break-word"},["".concat(t,"-content")]:{position:"relative",backgroundColor:e.contentBg,backgroundClip:"padding-box",border:0,borderRadius:e.borderRadiusLG,boxShadow:e.boxShadow,pointerEvents:"auto",padding:e.contentPadding},["".concat(t,"-close")]:Object.assign({position:"absolute",top:e.calc(e.modalHeaderHeight).sub(e.modalCloseBtnSize).div(2).equal(),insetInlineEnd:e.calc(e.modalHeaderHeight).sub(e.modalCloseBtnSize).div(2).equal(),zIndex:e.calc(e.zIndexPopupBase).add(10).equal(),padding:0,color:e.modalCloseIconColor,fontWeight:e.fontWeightStrong,lineHeight:1,textDecoration:"none",background:"transparent",borderRadius:e.borderRadiusSM,width:e.modalCloseBtnSize,height:e.modalCloseBtnSize,border:0,outline:0,cursor:"pointer",transition:"color ".concat(e.motionDurationMid,", background-color ").concat(e.motionDurationMid),"&-x":{display:"flex",fontSize:e.fontSizeLG,fontStyle:"normal",lineHeight:"".concat((0,eg.bf)(e.modalCloseBtnSize)),justifyContent:"center",textTransform:"none",textRendering:"auto"},"&:hover":{color:e.modalIconHoverColor,backgroundColor:e.closeBtnHoverBg,textDecoration:"none"},"&:active":{backgroundColor:e.closeBtnActiveBg}},(0,el.Qy)(e)),["".concat(t,"-header")]:{color:e.colorText,background:e.headerBg,borderRadius:"".concat((0,eg.bf)(e.borderRadiusLG)," ").concat((0,eg.bf)(e.borderRadiusLG)," 0 0"),marginBottom:e.headerMarginBottom,padding:e.headerPadding,borderBottom:e.headerBorderBottom},["".concat(t,"-body")]:{fontSize:e.fontSize,lineHeight:e.lineHeight,wordWrap:"break-word",padding:e.bodyPadding},["".concat(t,"-footer")]:{textAlign:"end",background:e.footerBg,marginTop:e.footerMarginTop,padding:e.footerPadding,borderTop:e.footerBorderTop,borderRadius:e.footerBorderRadius,["> ".concat(e.antCls,"-btn + ").concat(e.antCls,"-btn")]:{marginInlineStart:e.marginXS}},["".concat(t,"-open")]:{overflow:"hidden"}})},{["".concat(t,"-pure-panel")]:{top:"auto",padding:0,display:"flex",flexDirection:"column",["".concat(t,"-content,\n ").concat(t,"-body,\n ").concat(t,"-confirm-body-wrapper")]:{display:"flex",flexDirection:"column",flex:"auto"},["".concat(t,"-confirm-body")]:{marginBottom:"auto"}}}]},eE=e=>{let{componentCls:t}=e;return{["".concat(t,"-root")]:{["".concat(t,"-wrap-rtl")]:{direction:"rtl",["".concat(t,"-confirm-body")]:{direction:"rtl"}}}}},eh=e=>{let t=e.padding,n=e.fontSizeHeading5,a=e.lineHeightHeading5;return(0,eu.TS)(e,{modalHeaderHeight:e.calc(e.calc(a).mul(n).equal()).add(e.calc(t).mul(2).equal()).equal(),modalFooterBorderColorSplit:e.colorSplit,modalFooterBorderStyle:e.lineType,modalFooterBorderWidth:e.lineWidth,modalIconHoverColor:e.colorIconHover,modalCloseIconColor:e.colorIcon,modalCloseBtnSize:e.fontHeight,modalConfirmIconSize:e.fontHeight,modalTitleHeight:e.calc(e.titleFontSize).mul(e.titleLineHeight).equal()})},eS=e=>({footerBg:"transparent",headerBg:e.colorBgElevated,titleLineHeight:e.lineHeightHeading5,titleFontSize:e.fontSizeHeading5,contentBg:e.colorBgElevated,titleColor:e.colorTextHeading,closeBtnHoverBg:e.wireframe?"transparent":e.colorFillContent,closeBtnActiveBg:e.wireframe?"transparent":e.colorFillContentHover,contentPadding:e.wireframe?0:"".concat((0,eg.bf)(e.paddingMD)," ").concat((0,eg.bf)(e.paddingContentHorizontalLG)),headerPadding:e.wireframe?"".concat((0,eg.bf)(e.padding)," ").concat((0,eg.bf)(e.paddingLG)):0,headerBorderBottom:e.wireframe?"".concat((0,eg.bf)(e.lineWidth)," ").concat(e.lineType," ").concat(e.colorSplit):"none",headerMarginBottom:e.wireframe?0:e.marginXS,bodyPadding:e.wireframe?e.paddingLG:0,footerPadding:e.wireframe?"".concat((0,eg.bf)(e.paddingXS)," ").concat((0,eg.bf)(e.padding)):0,footerBorderTop:e.wireframe?"".concat((0,eg.bf)(e.lineWidth)," ").concat(e.lineType," ").concat(e.colorSplit):"none",footerBorderRadius:e.wireframe?"0 0 ".concat((0,eg.bf)(e.borderRadiusLG)," ").concat((0,eg.bf)(e.borderRadiusLG)):0,footerMarginTop:e.wireframe?0:e.marginSM,confirmBodyPadding:e.wireframe?"".concat((0,eg.bf)(2*e.padding)," ").concat((0,eg.bf)(2*e.padding)," ").concat((0,eg.bf)(e.paddingLG)):0,confirmIconMarginInlineEnd:e.wireframe?e.margin:e.marginSM,confirmBtnsMarginTop:e.wireframe?e.marginLG:e.marginSM});var ey=(0,ep.I$)("Modal",e=>{let t=eh(e);return[ef(t),eE(t),eb(t),(0,ed._y)(t,"zoom")]},eS,{unitless:{titleLineHeight:!0}}),eT=n(92935),eA=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n};(0,K.Z)()&&window.document.documentElement&&document.documentElement.addEventListener("click",e=>{a={x:e.pageX,y:e.pageY},setTimeout(()=>{a=null},100)},!0);var eR=e=>{var t;let{getPopupContainer:n,getPrefixCls:r,direction:o,modal:l}=i.useContext(s.E_),c=t=>{let{onCancel:n}=e;null==n||n(t)},{prefixCls:d,className:u,rootClassName:p,open:g,wrapClassName:E,centered:h,getContainer:S,closeIcon:y,closable:T,focusTriggerAfterClose:A=!0,style:R,visible:I,width:N=520,footer:_,classNames:w,styles:k}=e,C=eA(e,["prefixCls","className","rootClassName","open","wrapClassName","centered","getContainer","closeIcon","closable","focusTriggerAfterClose","style","visible","width","footer","classNames","styles"]),O=r("modal",d),x=r(),L=(0,eT.Z)(O),[D,P,M]=ey(O,L),F=m()(E,{["".concat(O,"-centered")]:!!h,["".concat(O,"-wrap-rtl")]:"rtl"===o}),U=null!==_&&i.createElement(es,Object.assign({},e,{onOk:t=>{let{onOk:n}=e;null==n||n(t)},onCancel:c})),[B,G]=Y(T,y,e=>eo(O,e),i.createElement(v.Z,{className:"".concat(O,"-close-icon")}),!0),$=function(e){let t=i.useContext(et),n=i.useRef();return(0,J.zX)(a=>{if(a){let r=e?a.querySelector(e):a;t.add(r),n.current=r}else t.remove(n.current)})}(".".concat(O,"-content")),[H,z]=(0,b.Cn)("Modal",C.zIndex);return D(i.createElement(Q.BR,null,i.createElement(X.Ux,{status:!0,override:!0},i.createElement(Z.Z.Provider,{value:z},i.createElement(q,Object.assign({width:N},C,{zIndex:H,getContainer:void 0===S?n:S,prefixCls:O,rootClassName:m()(P,p,M,L),footer:U,visible:null!=g?g:I,mousePosition:null!==(t=C.mousePosition)&&void 0!==t?t:a,onClose:c,closable:B,closeIcon:G,focusTriggerAfterClose:A,transitionName:(0,f.m)(x,"zoom",e.transitionName),maskTransitionName:(0,f.m)(x,"fade",e.maskTransitionName),className:m()(P,u,null==l?void 0:l.className),style:Object.assign(Object.assign({},null==l?void 0:l.style),R),classNames:Object.assign(Object.assign({wrapper:F},null==l?void 0:l.classNames),w),styles:Object.assign(Object.assign({},null==l?void 0:l.styles),k),panelRef:$}))))))};let eI=e=>{let{componentCls:t,titleFontSize:n,titleLineHeight:a,modalConfirmIconSize:r,fontSize:i,lineHeight:o,modalTitleHeight:s,fontHeight:l,confirmBodyPadding:c}=e,d="".concat(t,"-confirm");return{[d]:{"&-rtl":{direction:"rtl"},["".concat(e.antCls,"-modal-header")]:{display:"none"},["".concat(d,"-body-wrapper")]:Object.assign({},(0,el.dF)()),["&".concat(t," ").concat(t,"-body")]:{padding:c},["".concat(d,"-body")]:{display:"flex",flexWrap:"nowrap",alignItems:"start",["> ".concat(e.iconCls)]:{flex:"none",fontSize:r,marginInlineEnd:e.confirmIconMarginInlineEnd,marginTop:e.calc(e.calc(l).sub(r).equal()).div(2).equal()},["&-has-title > ".concat(e.iconCls)]:{marginTop:e.calc(e.calc(s).sub(r).equal()).div(2).equal()}},["".concat(d,"-paragraph")]:{display:"flex",flexDirection:"column",flex:"auto",rowGap:e.marginXS,maxWidth:"calc(100% - ".concat((0,eg.bf)(e.calc(e.modalConfirmIconSize).add(e.marginSM).equal()),")")},["".concat(d,"-title")]:{color:e.colorTextHeading,fontWeight:e.fontWeightStrong,fontSize:n,lineHeight:a},["".concat(d,"-content")]:{color:e.colorText,fontSize:i,lineHeight:o},["".concat(d,"-btns")]:{textAlign:"end",marginTop:e.confirmBtnsMarginTop,["".concat(e.antCls,"-btn + ").concat(e.antCls,"-btn")]:{marginBottom:0,marginInlineStart:e.marginXS}}},["".concat(d,"-error ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorError},["".concat(d,"-warning ").concat(d,"-body > ").concat(e.iconCls,",\n ").concat(d,"-confirm ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorWarning},["".concat(d,"-info ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorInfo},["".concat(d,"-success ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorSuccess}}};var eN=(0,ep.bk)(["Modal","confirm"],e=>[eI(eh(e))],eS,{order:-1e3}),e_=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n};function ev(e){let{prefixCls:t,icon:n,okText:a,cancelText:o,confirmPrefixCls:s,type:l,okCancel:g,footer:b,locale:f}=e,h=e_(e,["prefixCls","icon","okText","cancelText","confirmPrefixCls","type","okCancel","footer","locale"]),S=n;if(!n&&null!==n)switch(l){case"info":S=i.createElement(p.Z,null);break;case"success":S=i.createElement(c.Z,null);break;case"error":S=i.createElement(d.Z,null);break;default:S=i.createElement(u.Z,null)}let y=null!=g?g:"confirm"===l,T=null!==e.autoFocusButton&&(e.autoFocusButton||"ok"),[A]=(0,E.Z)("Modal"),R=f||A,v=a||(y?null==R?void 0:R.okText:null==R?void 0:R.justOkText),w=Object.assign({autoFocusButton:T,cancelTextLocale:o||(null==R?void 0:R.cancelText),okTextLocale:v,mergedOkCancel:y},h),k=i.useMemo(()=>w,(0,r.Z)(Object.values(w))),C=i.createElement(i.Fragment,null,i.createElement(N,null),i.createElement(_,null)),O=void 0!==e.title&&null!==e.title,x="".concat(s,"-body");return i.createElement("div",{className:"".concat(s,"-body-wrapper")},i.createElement("div",{className:m()(x,{["".concat(x,"-has-title")]:O})},S,i.createElement("div",{className:"".concat(s,"-paragraph")},O&&i.createElement("span",{className:"".concat(s,"-title")},e.title),i.createElement("div",{className:"".concat(s,"-content")},e.content))),void 0===b||"function"==typeof b?i.createElement(I,{value:k},i.createElement("div",{className:"".concat(s,"-btns")},"function"==typeof b?b(C,{OkBtn:_,CancelBtn:N}):C)):b,i.createElement(eN,{prefixCls:t}))}let ew=e=>{let{close:t,zIndex:n,afterClose:a,open:r,keyboard:o,centered:s,getContainer:l,maskStyle:c,direction:d,prefixCls:u,wrapClassName:p,rootPrefixCls:g,bodyStyle:E,closable:S=!1,closeIcon:y,modalRender:T,focusTriggerAfterClose:A,onConfirm:R,styles:I}=e,N="".concat(u,"-confirm"),_=e.width||416,v=e.style||{},w=void 0===e.mask||e.mask,k=void 0!==e.maskClosable&&e.maskClosable,C=m()(N,"".concat(N,"-").concat(e.type),{["".concat(N,"-rtl")]:"rtl"===d},e.className),[,O]=(0,h.ZP)(),x=i.useMemo(()=>void 0!==n?n:O.zIndexPopupBase+b.u6,[n,O]);return i.createElement(eR,{prefixCls:u,className:C,wrapClassName:m()({["".concat(N,"-centered")]:!!e.centered},p),onCancel:()=>{null==t||t({triggerCancel:!0}),null==R||R(!1)},open:r,title:"",footer:null,transitionName:(0,f.m)(g||"","zoom",e.transitionName),maskTransitionName:(0,f.m)(g||"","fade",e.maskTransitionName),mask:w,maskClosable:k,style:v,styles:Object.assign({body:E,mask:c},I),width:_,zIndex:x,afterClose:a,keyboard:o,centered:s,getContainer:l,closable:S,closeIcon:y,modalRender:T,focusTriggerAfterClose:A},i.createElement(ev,Object.assign({},e,{confirmPrefixCls:N})))};var ek=e=>{let{rootPrefixCls:t,iconPrefixCls:n,direction:a,theme:r}=e;return i.createElement(l.ZP,{prefixCls:t,iconPrefixCls:n,direction:a,theme:r},i.createElement(ew,Object.assign({},e)))},eC=[];let eO="",ex=e=>{var t,n;let{prefixCls:a,getContainer:r,direction:o}=e,l=(0,ei.A)(),c=(0,i.useContext)(s.E_),d=eO||c.getPrefixCls(),u=a||"".concat(d,"-modal"),p=r;return!1===p&&(p=void 0),i.createElement(ek,Object.assign({},e,{rootPrefixCls:d,prefixCls:u,iconPrefixCls:c.iconPrefixCls,theme:c.theme,direction:null!=o?o:c.direction,locale:null!==(n=null===(t=c.locale)||void 0===t?void 0:t.Modal)&&void 0!==n?n:l,getContainer:p}))};function eL(e){let t;let n=(0,l.w6)(),a=document.createDocumentFragment(),s=Object.assign(Object.assign({},e),{close:u,open:!0});function c(){for(var t=arguments.length,n=Array(t),i=0;ie&&e.triggerCancel);e.onCancel&&s&&e.onCancel.apply(e,[()=>{}].concat((0,r.Z)(n.slice(1))));for(let e=0;e{let t=n.getPrefixCls(void 0,eO),r=n.getIconPrefixCls(),s=n.getTheme(),c=i.createElement(ex,Object.assign({},e));(0,o.s)(i.createElement(l.ZP,{prefixCls:t,iconPrefixCls:r,theme:s},n.holderRender?n.holderRender(c):c),a)})}function u(){for(var t=arguments.length,n=Array(t),a=0;a{"function"==typeof e.afterClose&&e.afterClose(),c.apply(this,n)}})).visible&&delete s.visible,d(s)}return d(s),eC.push(u),{destroy:u,update:function(e){d(s="function"==typeof e?e(s):Object.assign(Object.assign({},s),e))}}}function eD(e){return Object.assign(Object.assign({},e),{type:"warning"})}function eP(e){return Object.assign(Object.assign({},e),{type:"info"})}function eM(e){return Object.assign(Object.assign({},e),{type:"success"})}function eF(e){return Object.assign(Object.assign({},e),{type:"error"})}function eU(e){return Object.assign(Object.assign({},e),{type:"confirm"})}var eB=n(21467),eG=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n},e$=(0,eB.i)(e=>{let{prefixCls:t,className:n,closeIcon:a,closable:r,type:o,title:l,children:c,footer:d}=e,u=eG(e,["prefixCls","className","closeIcon","closable","type","title","children","footer"]),{getPrefixCls:p}=i.useContext(s.E_),g=p(),b=t||p("modal"),f=(0,eT.Z)(g),[E,h,S]=ey(b,f),y="".concat(b,"-confirm"),T={};return T=o?{closable:null!=r&&r,title:"",footer:"",children:i.createElement(ev,Object.assign({},e,{prefixCls:b,confirmPrefixCls:y,rootPrefixCls:g,content:c}))}:{closable:null==r||r,title:l,footer:null!==d&&i.createElement(es,Object.assign({},e)),children:c},E(i.createElement(z,Object.assign({prefixCls:b,className:m()(h,"".concat(b,"-pure-panel"),o&&y,o&&"".concat(y,"-").concat(o),n,S,f)},u,{closeIcon:eo(b,a),closable:r},T)))}),eH=n(79474),ez=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n},ej=i.forwardRef((e,t)=>{var n,{afterClose:a,config:o}=e,l=ez(e,["afterClose","config"]);let[c,d]=i.useState(!0),[u,p]=i.useState(o),{direction:g,getPrefixCls:m}=i.useContext(s.E_),b=m("modal"),f=m(),h=function(){d(!1);for(var e=arguments.length,t=Array(e),n=0;ne&&e.triggerCancel);u.onCancel&&a&&u.onCancel.apply(u,[()=>{}].concat((0,r.Z)(t.slice(1))))};i.useImperativeHandle(t,()=>({destroy:h,update:e=>{p(t=>Object.assign(Object.assign({},t),e))}}));let S=null!==(n=u.okCancel)&&void 0!==n?n:"confirm"===u.type,[y]=(0,E.Z)("Modal",eH.Z.Modal);return i.createElement(ek,Object.assign({prefixCls:b,rootPrefixCls:f},u,{close:h,open:c,afterClose:()=>{var e;a(),null===(e=u.afterClose)||void 0===e||e.call(u)},okText:u.okText||(S?null==y?void 0:y.okText:null==y?void 0:y.justOkText),direction:u.direction||g,cancelText:u.cancelText||(null==y?void 0:y.cancelText)},l))});let eV=0,eW=i.memo(i.forwardRef((e,t)=>{let[n,a]=function(){let[e,t]=i.useState([]);return[e,i.useCallback(e=>(t(t=>[].concat((0,r.Z)(t),[e])),()=>{t(t=>t.filter(t=>t!==e))}),[])]}();return i.useImperativeHandle(t,()=>({patchElement:a}),[]),i.createElement(i.Fragment,null,n)}));function eq(e){return eL(eD(e))}eR.useModal=function(){let e=i.useRef(null),[t,n]=i.useState([]);i.useEffect(()=>{t.length&&((0,r.Z)(t).forEach(e=>{e()}),n([]))},[t]);let a=i.useCallback(t=>function(a){var o;let s,l;eV+=1;let c=i.createRef(),d=new Promise(e=>{s=e}),u=!1,p=i.createElement(ej,{key:"modal-".concat(eV),config:t(a),ref:c,afterClose:()=>{null==l||l()},isSilent:()=>u,onConfirm:e=>{s(e)}});return(l=null===(o=e.current)||void 0===o?void 0:o.patchElement(p))&&eC.push(l),{destroy:()=>{function e(){var e;null===(e=c.current)||void 0===e||e.destroy()}c.current?e():n(t=>[].concat((0,r.Z)(t),[e]))},update:e=>{function t(){var t;null===(t=c.current)||void 0===t||t.update(e)}c.current?t():n(e=>[].concat((0,r.Z)(e),[t]))},then:e=>(u=!0,d.then(e))}},[]);return[i.useMemo(()=>({info:a(eP),success:a(eM),error:a(eF),warning:a(eD),confirm:a(eU)}),[]),i.createElement(eW,{key:"modal-holder",ref:e})]},eR.info=function(e){return eL(eP(e))},eR.success=function(e){return eL(eM(e))},eR.error=function(e){return eL(eF(e))},eR.warning=eq,eR.warn=eq,eR.confirm=function(e){return eL(eU(e))},eR.destroyAll=function(){for(;eC.length;){let e=eC.pop();e&&e()}},eR.config=function(e){let{rootPrefixCls:t}=e;eO=t},eR._InternalPanelDoNotUseOrYouWillBeFired=e$;var eY=eR},13703:function(e,t,n){n.d(t,{J$:function(){return s}});var a=n(8985),r=n(59353);let i=new a.E4("antFadeIn",{"0%":{opacity:0},"100%":{opacity:1}}),o=new a.E4("antFadeOut",{"0%":{opacity:1},"100%":{opacity:0}}),s=function(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],{antCls:n}=e,a="".concat(n,"-fade"),s=t?"&":"";return[(0,r.R)(a,i,o,e.motionDurationMid,t),{["\n ".concat(s).concat(a,"-enter,\n ").concat(s).concat(a,"-appear\n ")]:{opacity:0,animationTimingFunction:"linear"},["".concat(s).concat(a,"-leave")]:{animationTimingFunction:"linear"}}]}},44056:function(e){e.exports=function(e,n){for(var a,r,i,o=e||"",s=n||"div",l={},c=0;c4&&m.slice(0,4)===o&&s.test(t)&&("-"===t.charAt(4)?b=o+(n=t.slice(5).replace(l,u)).charAt(0).toUpperCase()+n.slice(1):(g=(p=t).slice(4),t=l.test(g)?p:("-"!==(g=g.replace(c,d)).charAt(0)&&(g="-"+g),o+g)),f=r),new f(b,t))};var s=/^data[-\w.:]+$/i,l=/-[a-z]/g,c=/[A-Z]/g;function d(e){return"-"+e.toLowerCase()}function u(e){return e.charAt(1).toUpperCase()}},31872:function(e,t,n){var a=n(96130),r=n(64730),i=n(61861),o=n(46982),s=n(83671),l=n(53618);e.exports=a([i,r,o,s,l])},83671:function(e,t,n){var a=n(7667),r=n(13585),i=a.booleanish,o=a.number,s=a.spaceSeparated;e.exports=r({transform:function(e,t){return"role"===t?t:"aria-"+t.slice(4).toLowerCase()},properties:{ariaActiveDescendant:null,ariaAtomic:i,ariaAutoComplete:null,ariaBusy:i,ariaChecked:i,ariaColCount:o,ariaColIndex:o,ariaColSpan:o,ariaControls:s,ariaCurrent:null,ariaDescribedBy:s,ariaDetails:null,ariaDisabled:i,ariaDropEffect:s,ariaErrorMessage:null,ariaExpanded:i,ariaFlowTo:s,ariaGrabbed:i,ariaHasPopup:null,ariaHidden:i,ariaInvalid:null,ariaKeyShortcuts:null,ariaLabel:null,ariaLabelledBy:s,ariaLevel:o,ariaLive:null,ariaModal:i,ariaMultiLine:i,ariaMultiSelectable:i,ariaOrientation:null,ariaOwns:s,ariaPlaceholder:null,ariaPosInSet:o,ariaPressed:i,ariaReadOnly:i,ariaRelevant:null,ariaRequired:i,ariaRoleDescription:s,ariaRowCount:o,ariaRowIndex:o,ariaRowSpan:o,ariaSelected:i,ariaSetSize:o,ariaSort:null,ariaValueMax:o,ariaValueMin:o,ariaValueNow:o,ariaValueText:null,role:null}})},53618:function(e,t,n){var a=n(7667),r=n(13585),i=n(46640),o=a.boolean,s=a.overloadedBoolean,l=a.booleanish,c=a.number,d=a.spaceSeparated,u=a.commaSeparated;e.exports=r({space:"html",attributes:{acceptcharset:"accept-charset",classname:"class",htmlfor:"for",httpequiv:"http-equiv"},transform:i,mustUseProperty:["checked","multiple","muted","selected"],properties:{abbr:null,accept:u,acceptCharset:d,accessKey:d,action:null,allow:null,allowFullScreen:o,allowPaymentRequest:o,allowUserMedia:o,alt:null,as:null,async:o,autoCapitalize:null,autoComplete:d,autoFocus:o,autoPlay:o,capture:o,charSet:null,checked:o,cite:null,className:d,cols:c,colSpan:null,content:null,contentEditable:l,controls:o,controlsList:d,coords:c|u,crossOrigin:null,data:null,dateTime:null,decoding:null,default:o,defer:o,dir:null,dirName:null,disabled:o,download:s,draggable:l,encType:null,enterKeyHint:null,form:null,formAction:null,formEncType:null,formMethod:null,formNoValidate:o,formTarget:null,headers:d,height:c,hidden:o,high:c,href:null,hrefLang:null,htmlFor:d,httpEquiv:d,id:null,imageSizes:null,imageSrcSet:u,inputMode:null,integrity:null,is:null,isMap:o,itemId:null,itemProp:d,itemRef:d,itemScope:o,itemType:d,kind:null,label:null,lang:null,language:null,list:null,loading:null,loop:o,low:c,manifest:null,max:null,maxLength:c,media:null,method:null,min:null,minLength:c,multiple:o,muted:o,name:null,nonce:null,noModule:o,noValidate:o,onAbort:null,onAfterPrint:null,onAuxClick:null,onBeforePrint:null,onBeforeUnload:null,onBlur:null,onCancel:null,onCanPlay:null,onCanPlayThrough:null,onChange:null,onClick:null,onClose:null,onContextMenu:null,onCopy:null,onCueChange:null,onCut:null,onDblClick:null,onDrag:null,onDragEnd:null,onDragEnter:null,onDragExit:null,onDragLeave:null,onDragOver:null,onDragStart:null,onDrop:null,onDurationChange:null,onEmptied:null,onEnded:null,onError:null,onFocus:null,onFormData:null,onHashChange:null,onInput:null,onInvalid:null,onKeyDown:null,onKeyPress:null,onKeyUp:null,onLanguageChange:null,onLoad:null,onLoadedData:null,onLoadedMetadata:null,onLoadEnd:null,onLoadStart:null,onMessage:null,onMessageError:null,onMouseDown:null,onMouseEnter:null,onMouseLeave:null,onMouseMove:null,onMouseOut:null,onMouseOver:null,onMouseUp:null,onOffline:null,onOnline:null,onPageHide:null,onPageShow:null,onPaste:null,onPause:null,onPlay:null,onPlaying:null,onPopState:null,onProgress:null,onRateChange:null,onRejectionHandled:null,onReset:null,onResize:null,onScroll:null,onSecurityPolicyViolation:null,onSeeked:null,onSeeking:null,onSelect:null,onSlotChange:null,onStalled:null,onStorage:null,onSubmit:null,onSuspend:null,onTimeUpdate:null,onToggle:null,onUnhandledRejection:null,onUnload:null,onVolumeChange:null,onWaiting:null,onWheel:null,open:o,optimum:c,pattern:null,ping:d,placeholder:null,playsInline:o,poster:null,preload:null,readOnly:o,referrerPolicy:null,rel:d,required:o,reversed:o,rows:c,rowSpan:c,sandbox:d,scope:null,scoped:o,seamless:o,selected:o,shape:null,size:c,sizes:null,slot:null,span:c,spellCheck:l,src:null,srcDoc:null,srcLang:null,srcSet:u,start:c,step:null,style:null,tabIndex:c,target:null,title:null,translate:null,type:null,typeMustMatch:o,useMap:null,value:l,width:c,wrap:null,align:null,aLink:null,archive:d,axis:null,background:null,bgColor:null,border:c,borderColor:null,bottomMargin:c,cellPadding:null,cellSpacing:null,char:null,charOff:null,classId:null,clear:null,code:null,codeBase:null,codeType:null,color:null,compact:o,declare:o,event:null,face:null,frame:null,frameBorder:null,hSpace:c,leftMargin:c,link:null,longDesc:null,lowSrc:null,marginHeight:c,marginWidth:c,noResize:o,noHref:o,noShade:o,noWrap:o,object:null,profile:null,prompt:null,rev:null,rightMargin:c,rules:null,scheme:null,scrolling:l,standby:null,summary:null,text:null,topMargin:c,valueType:null,version:null,vAlign:null,vLink:null,vSpace:c,allowTransparency:null,autoCorrect:null,autoSave:null,disablePictureInPicture:o,disableRemotePlayback:o,prefix:null,property:null,results:c,security:null,unselectable:null}})},46640:function(e,t,n){var a=n(25852);e.exports=function(e,t){return a(e,t.toLowerCase())}},25852:function(e){e.exports=function(e,t){return t in e?e[t]:t}},13585:function(e,t,n){var a=n(39900),r=n(94949),i=n(7478);e.exports=function(e){var t,n,o=e.space,s=e.mustUseProperty||[],l=e.attributes||{},c=e.properties,d=e.transform,u={},p={};for(t in c)n=new i(t,d(l,t),c[t],o),-1!==s.indexOf(t)&&(n.mustUseProperty=!0),u[t]=n,p[a(t)]=t,p[a(n.attribute)]=t;return new r(u,p,o)}},7478:function(e,t,n){var a=n(74108),r=n(7667);e.exports=s,s.prototype=new a,s.prototype.defined=!0;var i=["boolean","booleanish","overloadedBoolean","number","commaSeparated","spaceSeparated","commaOrSpaceSeparated"],o=i.length;function s(e,t,n,s){var l,c,d,u=-1;for(s&&(this.space=s),a.call(this,e,t);++u