From 2afd922f8c2871797e7d71e01cf7b0313b0147dd Mon Sep 17 00:00:00 2001 From: Nolan Tremelling <34580718+NolanTrem@users.noreply.github.com> Date: Tue, 15 Apr 2025 13:30:49 -0700 Subject: [PATCH 01/13] Fix case where only system messages are passed to Gemini (#9992) --- .../llms/vertex_ai/gemini/transformation.py | 30 ++++++-- ...test_vertex_and_google_ai_studio_gemini.py | 69 +++++++++++++++++++ 2 files changed, 95 insertions(+), 4 deletions(-) diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py index 0afad13feb..d14dc9782d 100644 --- a/litellm/llms/vertex_ai/gemini/transformation.py +++ b/litellm/llms/vertex_ai/gemini/transformation.py @@ -331,7 +331,7 @@ def _gemini_convert_messages_with_history( # noqa: PLR0915 raise e -def _transform_request_body( +def _transform_request_body( # noqa: PLR0915 messages: List[AllMessageValues], model: str, optional_params: dict, @@ -342,13 +342,35 @@ def _transform_request_body( """ Common transformation logic across sync + async Gemini /generateContent calls. """ + # Duplicate system message as user message for Gemini + duplicate_system_as_user = optional_params.pop("duplicate_system_as_user_for_gemini", True) + + # Check if all messages are system messages + all_system_messages = all(message["role"] == "system" for message in messages) + # Separate system prompt from rest of message supports_system_message = get_supports_system_message( model=model, custom_llm_provider=custom_llm_provider ) - system_instructions, messages = _transform_system_message( - supports_system_message=supports_system_message, messages=messages - ) + + system_instructions = None + # If all messages are system messages, add a user message to the end + if (all_system_messages and supports_system_message and messages): + # Always create system instruction + system_content = messages[0].get("content", "") + system_part = PartType(text=system_content) # type: ignore + system_instructions = SystemInstructions(parts=[system_part]) + + # Only duplicate as user message if flag is set + if duplicate_system_as_user or litellm.modify_params: + user_message = cast(AllMessageValues, { + "role": "user", + "content": system_content + }) + messages = [user_message] + else: + messages = [] + # Checks for 'response_schema' support - if passed in if "response_schema" in optional_params: supports_response_schema = get_supports_response_schema( diff --git a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py b/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py index d2169c299e..a5ad4f6a9a 100644 --- a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py +++ b/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py @@ -8,6 +8,7 @@ from litellm import ModelResponse from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( VertexGeminiConfig, ) +from litellm.llms.vertex_ai.gemini.transformation import _transform_request_body from litellm.types.utils import ChoiceLogprobs @@ -62,3 +63,71 @@ def test_get_model_name_from_gemini_spec_model(): model = "gemini/ft-uuid-123" result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model) assert result == "ft-uuid-123" + +def test_system_message_conversion_gemini(): + """Test that system-only messages are properly handled for Gemini""" + # Case 1: Default behavior - duplicate system as user + messages = [{"role": "system", "content": "You are a helpful assistant"}] + + # Create mock objects for the test + model = "gemini-2.0-flash" + custom_llm_provider = "gemini" + optional_params = {} + litellm_params = {} + + result = _transform_request_body( + messages=messages, # type: ignore + model=model, + optional_params=optional_params, + custom_llm_provider=custom_llm_provider, + litellm_params=litellm_params, + cached_content=None + ) + + # Verify that contents has user message + assert len(result["contents"]) > 0 + assert result["contents"][0]["role"] == "user" # type: ignore + assert "system_instruction" in result + + # Case 2: Disable duplication + optional_params = {"duplicate_system_as_user_for_gemini": False} + + # Save original modify_params value + original_modify_params = litellm.modify_params + litellm.modify_params = False + + result_no_duplicate = _transform_request_body( + messages=messages.copy(), # type: ignore + model=model, + optional_params=optional_params, + custom_llm_provider=custom_llm_provider, + litellm_params={}, + cached_content=None + ) + + # Restore original modify_params value + litellm.modify_params = original_modify_params + + # With duplication disabled and modify_params False, + # we'd expect an empty contents field + # This might actually raise an exception in practice + assert "system_instruction" in result_no_duplicate + + # Case 3: With litellm.modify_params=True it should duplicate even with parameter set to False + litellm.modify_params = True + + result_with_modify_params = _transform_request_body( + messages=messages.copy(), # type: ignore + model=model, + optional_params={"duplicate_system_as_user_for_gemini": False}, + custom_llm_provider=custom_llm_provider, + litellm_params={}, + cached_content=None + ) + + # Restore original modify_params value + litellm.modify_params = original_modify_params + + # Verify that contents has user message due to modify_params=True + assert len(result_with_modify_params["contents"]) > 0 + assert result_with_modify_params["contents"][0]["role"] == "user" # type: ignore From 6b5f093087e773af32f397d911d2ed959ffa12d6 Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Tue, 15 Apr 2025 13:34:03 -0700 Subject: [PATCH 02/13] Revert "Fix case where only system messages are passed to Gemini (#9992)" (#10027) This reverts commit 2afd922f8c2871797e7d71e01cf7b0313b0147dd. --- .../llms/vertex_ai/gemini/transformation.py | 30 ++------ ...test_vertex_and_google_ai_studio_gemini.py | 69 ------------------- 2 files changed, 4 insertions(+), 95 deletions(-) diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py index d14dc9782d..0afad13feb 100644 --- a/litellm/llms/vertex_ai/gemini/transformation.py +++ b/litellm/llms/vertex_ai/gemini/transformation.py @@ -331,7 +331,7 @@ def _gemini_convert_messages_with_history( # noqa: PLR0915 raise e -def _transform_request_body( # noqa: PLR0915 +def _transform_request_body( messages: List[AllMessageValues], model: str, optional_params: dict, @@ -342,35 +342,13 @@ def _transform_request_body( # noqa: PLR0915 """ Common transformation logic across sync + async Gemini /generateContent calls. """ - # Duplicate system message as user message for Gemini - duplicate_system_as_user = optional_params.pop("duplicate_system_as_user_for_gemini", True) - - # Check if all messages are system messages - all_system_messages = all(message["role"] == "system" for message in messages) - # Separate system prompt from rest of message supports_system_message = get_supports_system_message( model=model, custom_llm_provider=custom_llm_provider ) - - system_instructions = None - # If all messages are system messages, add a user message to the end - if (all_system_messages and supports_system_message and messages): - # Always create system instruction - system_content = messages[0].get("content", "") - system_part = PartType(text=system_content) # type: ignore - system_instructions = SystemInstructions(parts=[system_part]) - - # Only duplicate as user message if flag is set - if duplicate_system_as_user or litellm.modify_params: - user_message = cast(AllMessageValues, { - "role": "user", - "content": system_content - }) - messages = [user_message] - else: - messages = [] - + system_instructions, messages = _transform_system_message( + supports_system_message=supports_system_message, messages=messages + ) # Checks for 'response_schema' support - if passed in if "response_schema" in optional_params: supports_response_schema = get_supports_response_schema( diff --git a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py b/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py index a5ad4f6a9a..d2169c299e 100644 --- a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py +++ b/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py @@ -8,7 +8,6 @@ from litellm import ModelResponse from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( VertexGeminiConfig, ) -from litellm.llms.vertex_ai.gemini.transformation import _transform_request_body from litellm.types.utils import ChoiceLogprobs @@ -63,71 +62,3 @@ def test_get_model_name_from_gemini_spec_model(): model = "gemini/ft-uuid-123" result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model) assert result == "ft-uuid-123" - -def test_system_message_conversion_gemini(): - """Test that system-only messages are properly handled for Gemini""" - # Case 1: Default behavior - duplicate system as user - messages = [{"role": "system", "content": "You are a helpful assistant"}] - - # Create mock objects for the test - model = "gemini-2.0-flash" - custom_llm_provider = "gemini" - optional_params = {} - litellm_params = {} - - result = _transform_request_body( - messages=messages, # type: ignore - model=model, - optional_params=optional_params, - custom_llm_provider=custom_llm_provider, - litellm_params=litellm_params, - cached_content=None - ) - - # Verify that contents has user message - assert len(result["contents"]) > 0 - assert result["contents"][0]["role"] == "user" # type: ignore - assert "system_instruction" in result - - # Case 2: Disable duplication - optional_params = {"duplicate_system_as_user_for_gemini": False} - - # Save original modify_params value - original_modify_params = litellm.modify_params - litellm.modify_params = False - - result_no_duplicate = _transform_request_body( - messages=messages.copy(), # type: ignore - model=model, - optional_params=optional_params, - custom_llm_provider=custom_llm_provider, - litellm_params={}, - cached_content=None - ) - - # Restore original modify_params value - litellm.modify_params = original_modify_params - - # With duplication disabled and modify_params False, - # we'd expect an empty contents field - # This might actually raise an exception in practice - assert "system_instruction" in result_no_duplicate - - # Case 3: With litellm.modify_params=True it should duplicate even with parameter set to False - litellm.modify_params = True - - result_with_modify_params = _transform_request_body( - messages=messages.copy(), # type: ignore - model=model, - optional_params={"duplicate_system_as_user_for_gemini": False}, - custom_llm_provider=custom_llm_provider, - litellm_params={}, - cached_content=None - ) - - # Restore original modify_params value - litellm.modify_params = original_modify_params - - # Verify that contents has user message due to modify_params=True - assert len(result_with_modify_params["contents"]) > 0 - assert result_with_modify_params["contents"][0]["role"] == "user" # type: ignore From 8424171c2ab38e0ef303b2a94aac8939cbaa17a5 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 15 Apr 2025 13:41:22 -0700 Subject: [PATCH 03/13] fix(config_settings.md): cleanup --- docs/my-website/docs/proxy/config_settings.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index 1637366ec1..bd8e2116c2 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -334,7 +334,6 @@ router_settings: | AZURE_STORAGE_TENANT_ID | The Application Tenant ID to use for Authentication to Azure Blob Storage logging | AZURE_STORAGE_CLIENT_ID | The Application Client ID to use for Authentication to Azure Blob Storage logging | AZURE_STORAGE_CLIENT_SECRET | The Application Client Secret to use for Authentication to Azure Blob Storage logging - | BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service | BRAINTRUST_API_KEY | API key for Braintrust integration | CIRCLE_OIDC_TOKEN | OpenID Connect token for CircleCI From ef80d25f160d55fbbcd3b29c28c8b10c70323dd3 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 15 Apr 2025 13:52:46 -0700 Subject: [PATCH 04/13] =?UTF-8?q?bump:=20version=201.66.1=20=E2=86=92=201.?= =?UTF-8?q?66.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 92066d0105..256fe563eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.66.1" +version = "1.66.2" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -118,7 +118,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.66.1" +version = "1.66.2" version_files = [ "pyproject.toml:^version" ] From 09df3815b8b384d2605aab66810a9ef2a3a5167d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 15 Apr 2025 15:43:58 -0700 Subject: [PATCH 05/13] docs cache control injection points --- .../src/components/add_model/cache_control_settings.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/litellm-dashboard/src/components/add_model/cache_control_settings.tsx b/ui/litellm-dashboard/src/components/add_model/cache_control_settings.tsx index 5a81f87ea3..6bab202050 100644 --- a/ui/litellm-dashboard/src/components/add_model/cache_control_settings.tsx +++ b/ui/litellm-dashboard/src/components/add_model/cache_control_settings.tsx @@ -44,7 +44,7 @@ const CacheControlSettings: React.FC = ({ return ( <> Date: Wed, 16 Apr 2025 01:34:55 +0200 Subject: [PATCH 06/13] chore(docs): Update logging.md (#10006) Fixes a missing slash in OTEL_ENDPOINT example --- docs/my-website/docs/proxy/logging.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md index e13a403634..c8731dd270 100644 --- a/docs/my-website/docs/proxy/logging.md +++ b/docs/my-website/docs/proxy/logging.md @@ -862,7 +862,7 @@ Add the following to your env ```shell OTEL_EXPORTER="otlp_http" -OTEL_ENDPOINT="http:/0.0.0.0:4317" +OTEL_ENDPOINT="http://0.0.0.0:4317" OTEL_HEADERS="x-honeycomb-team=" # Optional ``` @@ -2501,4 +2501,4 @@ litellm_settings: :::info `thresholds` are not required by default, but you can tune the values to your needs. Default values is `4` for all categories -::: --> \ No newline at end of file +::: --> From 81e77411073b01cab32a39ac16c5cd1981729369 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 15 Apr 2025 16:35:26 -0700 Subject: [PATCH 07/13] build(deps): bump @babel/runtime in /ui/litellm-dashboard (#10001) Bumps [@babel/runtime](https://github.com/babel/babel/tree/HEAD/packages/babel-runtime) from 7.23.9 to 7.27.0. - [Release notes](https://github.com/babel/babel/releases) - [Changelog](https://github.com/babel/babel/blob/main/CHANGELOG.md) - [Commits](https://github.com/babel/babel/commits/v7.27.0/packages/babel-runtime) --- updated-dependencies: - dependency-name: "@babel/runtime" dependency-version: 7.27.0 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- ui/litellm-dashboard/package-lock.json | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ui/litellm-dashboard/package-lock.json b/ui/litellm-dashboard/package-lock.json index 18c2eaeace..6d38a7d70b 100644 --- a/ui/litellm-dashboard/package-lock.json +++ b/ui/litellm-dashboard/package-lock.json @@ -133,9 +133,10 @@ } }, "node_modules/@babel/runtime": { - "version": "7.23.9", - "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.23.9.tgz", - "integrity": "sha512-0CX6F+BI2s9dkUqr08KFrAIZgNFj75rdBU/DjCyYLIaV/quFjkk6T+EJ2LkZHyZTbEV4L5p97mNkUsHl2wLFAw==", + "version": "7.27.0", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.27.0.tgz", + "integrity": "sha512-VtPOkrdPHZsKc/clNqyi9WUA8TINkZ4cGk63UUE3u4pmB2k+ZMQRDuIOagv8UVd6j7k0T3+RRIb7beKTebNbcw==", + "license": "MIT", "dependencies": { "regenerator-runtime": "^0.14.0" }, From 837a6948d83be839a2c28dc7d84a6f78cb86c054 Mon Sep 17 00:00:00 2001 From: Marc Abramowitz Date: Tue, 15 Apr 2025 17:31:18 -0700 Subject: [PATCH 08/13] Fix typo: Entrata -> Entra in code (#9922) * Fix typo: Entrata -> Entra * Fix a few more --- .../release_notes/v1.57.8-stable/index.md | 2 +- .../integrations/azure_storage/azure_storage.py | 4 ++-- litellm/llms/azure/common_utils.py | 8 ++++---- .../llms/azure/test_azure_common_utils.py | 16 ++++++++-------- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/docs/my-website/release_notes/v1.57.8-stable/index.md b/docs/my-website/release_notes/v1.57.8-stable/index.md index ea712f0f77..78fe13f2ed 100644 --- a/docs/my-website/release_notes/v1.57.8-stable/index.md +++ b/docs/my-website/release_notes/v1.57.8-stable/index.md @@ -38,7 +38,7 @@ hide_table_of_contents: false 2. OpenAI Moderations - `omni-moderation-latest` support. [Start Here](https://docs.litellm.ai/docs/moderation) 3. Azure O1 - fake streaming support. This ensures if a `stream=true` is passed, the response is streamed. [Start Here](https://docs.litellm.ai/docs/providers/azure) 4. Anthropic - non-whitespace char stop sequence handling - [PR](https://github.com/BerriAI/litellm/pull/7484) -5. Azure OpenAI - support Entra id username + password based auth. [Start Here](https://docs.litellm.ai/docs/providers/azure#entrata-id---use-tenant_id-client_id-client_secret) +5. Azure OpenAI - support Entra ID username + password based auth. [Start Here](https://docs.litellm.ai/docs/providers/azure#entra-id---use-tenant_id-client_id-client_secret) 6. LM Studio - embedding route support. [Start Here](https://docs.litellm.ai/docs/providers/lm-studio) 7. WatsonX - ZenAPIKeyAuth support. [Start Here](https://docs.litellm.ai/docs/providers/watsonx) diff --git a/litellm/integrations/azure_storage/azure_storage.py b/litellm/integrations/azure_storage/azure_storage.py index 27f5e0e112..24a3f5749a 100644 --- a/litellm/integrations/azure_storage/azure_storage.py +++ b/litellm/integrations/azure_storage/azure_storage.py @@ -8,7 +8,7 @@ from typing import List, Optional from litellm._logging import verbose_logger from litellm.constants import AZURE_STORAGE_MSFT_VERSION from litellm.integrations.custom_batch_logger import CustomBatchLogger -from litellm.llms.azure.common_utils import get_azure_ad_token_from_entrata_id +from litellm.llms.azure.common_utils import get_azure_ad_token_from_entra_id from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, get_async_httpx_client, @@ -291,7 +291,7 @@ class AzureBlobStorageLogger(CustomBatchLogger): "Missing required environment variable: AZURE_STORAGE_CLIENT_SECRET" ) - token_provider = get_azure_ad_token_from_entrata_id( + token_provider = get_azure_ad_token_from_entra_id( tenant_id=tenant_id, client_id=client_id, client_secret=client_secret, diff --git a/litellm/llms/azure/common_utils.py b/litellm/llms/azure/common_utils.py index e190c8bfd7..012f47c851 100644 --- a/litellm/llms/azure/common_utils.py +++ b/litellm/llms/azure/common_utils.py @@ -61,7 +61,7 @@ def process_azure_headers(headers: Union[httpx.Headers, dict]) -> dict: return {**llm_response_headers, **openai_headers} -def get_azure_ad_token_from_entrata_id( +def get_azure_ad_token_from_entra_id( tenant_id: str, client_id: str, client_secret: str, @@ -81,7 +81,7 @@ def get_azure_ad_token_from_entrata_id( """ from azure.identity import ClientSecretCredential, get_bearer_token_provider - verbose_logger.debug("Getting Azure AD Token from Entrata ID") + verbose_logger.debug("Getting Azure AD Token from Entra ID") if tenant_id.startswith("os.environ/"): _tenant_id = get_secret_str(tenant_id) @@ -324,9 +324,9 @@ class BaseAzureLLM(BaseOpenAILLM): timeout = litellm_params.get("timeout") if not api_key and tenant_id and client_id and client_secret: verbose_logger.debug( - "Using Azure AD Token Provider from Entrata ID for Azure Auth" + "Using Azure AD Token Provider from Entra ID for Azure Auth" ) - azure_ad_token_provider = get_azure_ad_token_from_entrata_id( + azure_ad_token_provider = get_azure_ad_token_from_entra_id( tenant_id=tenant_id, client_id=client_id, client_secret=client_secret, diff --git a/tests/litellm/llms/azure/test_azure_common_utils.py b/tests/litellm/llms/azure/test_azure_common_utils.py index bbd2a282fc..42b5903ee8 100644 --- a/tests/litellm/llms/azure/test_azure_common_utils.py +++ b/tests/litellm/llms/azure/test_azure_common_utils.py @@ -19,8 +19,8 @@ from litellm.types.utils import CallTypes @pytest.fixture def setup_mocks(): with patch( - "litellm.llms.azure.common_utils.get_azure_ad_token_from_entrata_id" - ) as mock_entrata_token, patch( + "litellm.llms.azure.common_utils.get_azure_ad_token_from_entra_id" + ) as mock_entra_token, patch( "litellm.llms.azure.common_utils.get_azure_ad_token_from_username_password" ) as mock_username_password_token, patch( "litellm.llms.azure.common_utils.get_azure_ad_token_from_oidc" @@ -37,7 +37,7 @@ def setup_mocks(): mock_litellm.AZURE_DEFAULT_API_VERSION = "2023-05-15" mock_litellm.enable_azure_ad_token_refresh = False - mock_entrata_token.return_value = lambda: "mock-entrata-token" + mock_entra_token.return_value = lambda: "mock-entra-token" mock_username_password_token.return_value = ( lambda: "mock-username-password-token" ) @@ -49,7 +49,7 @@ def setup_mocks(): ) yield { - "entrata_token": mock_entrata_token, + "entra_token": mock_entra_token, "username_password_token": mock_username_password_token, "oidc_token": mock_oidc_token, "token_provider": mock_token_provider, @@ -92,8 +92,8 @@ def test_initialize_with_tenant_credentials_env_var(setup_mocks, monkeypatch): is_async=False, ) - # Verify that get_azure_ad_token_from_entrata_id was called - setup_mocks["entrata_token"].assert_called_once_with( + # Verify that get_azure_ad_token_from_entra_id was called + setup_mocks["entra_token"].assert_called_once_with( tenant_id="test-tenant-id", client_id="test-client-id", client_secret="test-client-secret", @@ -120,8 +120,8 @@ def test_initialize_with_tenant_credentials(setup_mocks): is_async=False, ) - # Verify that get_azure_ad_token_from_entrata_id was called - setup_mocks["entrata_token"].assert_called_once_with( + # Verify that get_azure_ad_token_from_entra_id was called + setup_mocks["entra_token"].assert_called_once_with( tenant_id="test-tenant-id", client_id="test-client-id", client_secret="test-client-secret", From e3729f9855749fb6e5ab97675681e70486ce45fb Mon Sep 17 00:00:00 2001 From: Adrian Lyjak Date: Tue, 15 Apr 2025 22:12:02 -0400 Subject: [PATCH 09/13] fix #9783: Retain schema field ordering for google gemini and vertex (#9828) --- litellm/llms/vertex_ai/common_utils.py | 38 ++++- .../vertex_and_google_ai_studio_gemini.py | 7 +- ...test_vertex_and_google_ai_studio_gemini.py | 159 ++++++++++++++++++ 3 files changed, 198 insertions(+), 6 deletions(-) diff --git a/litellm/llms/vertex_ai/common_utils.py b/litellm/llms/vertex_ai/common_utils.py index be8e4749dd..9f7b7f49a7 100644 --- a/litellm/llms/vertex_ai/common_utils.py +++ b/litellm/llms/vertex_ai/common_utils.py @@ -165,9 +165,18 @@ def _check_text_in_content(parts: List[PartType]) -> bool: return has_text_param -def _build_vertex_schema(parameters: dict): +def _build_vertex_schema(parameters: dict, add_property_ordering: bool = False): """ This is a modified version of https://github.com/google-gemini/generative-ai-python/blob/8f77cc6ac99937cd3a81299ecf79608b91b06bbb/google/generativeai/types/content_types.py#L419 + + Updates the input parameters, removing extraneous fields, adjusting types, unwinding $defs, and adding propertyOrdering if specified, returning the updated parameters. + + Parameters: + parameters: dict - the json schema to build from + add_property_ordering: bool - whether to add propertyOrdering to the schema. This is only applicable to schemas for structured outputs. See + set_schema_property_ordering for more details. + Returns: + parameters: dict - the input parameters, modified in place """ # Get valid fields from Schema TypedDict valid_schema_fields = set(get_type_hints(Schema).keys()) @@ -186,8 +195,31 @@ def _build_vertex_schema(parameters: dict): add_object_type(parameters) # Postprocessing # Filter out fields that don't exist in Schema - filtered_parameters = filter_schema_fields(parameters, valid_schema_fields) - return filtered_parameters + parameters = filter_schema_fields(parameters, valid_schema_fields) + + if add_property_ordering: + set_schema_property_ordering(parameters) + return parameters + + +def set_schema_property_ordering(schema: Dict[str, Any]) -> Dict[str, Any]: + """ + vertex ai and generativeai apis order output of fields alphabetically, unless you specify the order. + python dicts retain order, so we just use that. Note that this field only applies to structured outputs, and not tools. + Function tools are not afflicted by the same alphabetical ordering issue, (the order of keys returned seems to be arbitrary, up to the model) + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.cachedContents#Schema.FIELDS.property_ordering + """ + if "properties" in schema and isinstance(schema["properties"], dict): + # retain propertyOrdering as an escape hatch if user already specifies it + if "propertyOrdering" not in schema: + schema["propertyOrdering"] = [k for k, v in schema["properties"].items()] + for k, v in schema["properties"].items(): + set_schema_property_ordering(v) + if "items" in schema: + set_schema_property_ordering(schema["items"]) + return schema + + def filter_schema_fields( diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py index c57090093b..3a58bb2c6d 100644 --- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py @@ -207,7 +207,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): "extra_headers", "seed", "logprobs", - "top_logprobs", # Added this to list of supported openAI params + "top_logprobs", "modalities", ] @@ -313,9 +313,10 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): if isinstance(old_schema, list): for item in old_schema: if isinstance(item, dict): - item = _build_vertex_schema(parameters=item) + item = _build_vertex_schema(parameters=item, add_property_ordering=True) + elif isinstance(old_schema, dict): - old_schema = _build_vertex_schema(parameters=old_schema) + old_schema = _build_vertex_schema(parameters=old_schema, add_property_ordering=True) return old_schema def apply_response_schema_transformation(self, value: dict, optional_params: dict): diff --git a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py b/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py index d2169c299e..41a37e2e57 100644 --- a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py +++ b/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py @@ -9,6 +9,8 @@ from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( VertexGeminiConfig, ) from litellm.types.utils import ChoiceLogprobs +from pydantic import BaseModel +from typing import List, cast def test_top_logprobs(): @@ -62,3 +64,160 @@ def test_get_model_name_from_gemini_spec_model(): model = "gemini/ft-uuid-123" result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model) assert result == "ft-uuid-123" + + + +def test_vertex_ai_response_schema_dict(): + v = VertexGeminiConfig() + transformed_request = v.map_openai_params( + non_default_params={ + "messages": [{"role": "user", "content": "Hello, world!"}], + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "math_reasoning", + "schema": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": { + "thought": {"type": "string"}, + "output": {"type": "string"}, + }, + "required": ["thought", "output"], + "additionalProperties": False, + }, + }, + "final_answer": {"type": "string"}, + }, + "required": ["steps", "final_answer"], + "additionalProperties": False, + }, + "strict": False, + }, + }, + }, + optional_params={}, + model="gemini-2.0-flash-lite", + drop_params=False, + ) + + schema = transformed_request["response_schema"] + # should add propertyOrdering + assert schema["propertyOrdering"] == ["steps", "final_answer"] + # should add propertyOrdering (recursively, including array items) + assert schema["properties"]["steps"]["items"]["propertyOrdering"] == [ + "thought", + "output", + ] + # should strip strict and additionalProperties + assert "strict" not in schema + assert "additionalProperties" not in schema + # validate the whole thing to catch regressions + assert transformed_request["response_schema"] == { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": { + "thought": {"type": "string"}, + "output": {"type": "string"}, + }, + "required": ["thought", "output"], + "propertyOrdering": ["thought", "output"], + }, + }, + "final_answer": {"type": "string"}, + }, + "required": ["steps", "final_answer"], + "propertyOrdering": ["steps", "final_answer"], + } + + +class MathReasoning(BaseModel): + steps: List["Step"] + final_answer: str + + +class Step(BaseModel): + thought: str + output: str + + +def test_vertex_ai_response_schema_defs(): + v = VertexGeminiConfig() + + schema = cast(dict, v.get_json_schema_from_pydantic_object(MathReasoning)) + + # pydantic conversion by default adds $defs to the schema, make sure this is still the case, otherwise this test isn't really testing anything + assert "$defs" in schema["json_schema"]["schema"] + + transformed_request = v.map_openai_params( + non_default_params={ + "messages": [{"role": "user", "content": "Hello, world!"}], + "response_format": schema, + }, + optional_params={}, + model="gemini-2.0-flash-lite", + drop_params=False, + ) + + assert "$defs" not in transformed_request["response_schema"] + assert transformed_request["response_schema"] == { + "title": "MathReasoning", + "type": "object", + "properties": { + "steps": { + "title": "Steps", + "type": "array", + "items": { + "title": "Step", + "type": "object", + "properties": { + "thought": {"title": "Thought", "type": "string"}, + "output": {"title": "Output", "type": "string"}, + }, + "required": ["thought", "output"], + "propertyOrdering": ["thought", "output"], + }, + }, + "final_answer": {"title": "Final Answer", "type": "string"}, + }, + "required": ["steps", "final_answer"], + "propertyOrdering": ["steps", "final_answer"], + } + + +def test_vertex_ai_retain_property_ordering(): + v = VertexGeminiConfig() + transformed_request = v.map_openai_params( + non_default_params={ + "messages": [{"role": "user", "content": "Hello, world!"}], + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "math_reasoning", + "schema": { + "type": "object", + "properties": { + "output": {"type": "string"}, + "thought": {"type": "string"}, + }, + "propertyOrdering": ["thought", "output"], + }, + }, + }, + }, + optional_params={}, + model="gemini-2.0-flash-lite", + drop_params=False, + ) + + schema = transformed_request["response_schema"] + # should leave existing value alone, despite dictionary ordering + assert schema["propertyOrdering"] == ["thought", "output"] From d3e7a137ad9ffa596c826da6f3ccfed83078f7b5 Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Tue, 15 Apr 2025 19:21:33 -0700 Subject: [PATCH 10/13] =?UTF-8?q?Revert=20"fix=20#9783:=20Retain=20schema?= =?UTF-8?q?=20field=20ordering=20for=20google=20gemini=20and=20vertex=20?= =?UTF-8?q?=E2=80=A6"=20(#10038)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit e3729f9855749fb6e5ab97675681e70486ce45fb. --- litellm/llms/vertex_ai/common_utils.py | 38 +---- .../vertex_and_google_ai_studio_gemini.py | 7 +- ...test_vertex_and_google_ai_studio_gemini.py | 159 ------------------ 3 files changed, 6 insertions(+), 198 deletions(-) diff --git a/litellm/llms/vertex_ai/common_utils.py b/litellm/llms/vertex_ai/common_utils.py index 9f7b7f49a7..be8e4749dd 100644 --- a/litellm/llms/vertex_ai/common_utils.py +++ b/litellm/llms/vertex_ai/common_utils.py @@ -165,18 +165,9 @@ def _check_text_in_content(parts: List[PartType]) -> bool: return has_text_param -def _build_vertex_schema(parameters: dict, add_property_ordering: bool = False): +def _build_vertex_schema(parameters: dict): """ This is a modified version of https://github.com/google-gemini/generative-ai-python/blob/8f77cc6ac99937cd3a81299ecf79608b91b06bbb/google/generativeai/types/content_types.py#L419 - - Updates the input parameters, removing extraneous fields, adjusting types, unwinding $defs, and adding propertyOrdering if specified, returning the updated parameters. - - Parameters: - parameters: dict - the json schema to build from - add_property_ordering: bool - whether to add propertyOrdering to the schema. This is only applicable to schemas for structured outputs. See - set_schema_property_ordering for more details. - Returns: - parameters: dict - the input parameters, modified in place """ # Get valid fields from Schema TypedDict valid_schema_fields = set(get_type_hints(Schema).keys()) @@ -195,31 +186,8 @@ def _build_vertex_schema(parameters: dict, add_property_ordering: bool = False): add_object_type(parameters) # Postprocessing # Filter out fields that don't exist in Schema - parameters = filter_schema_fields(parameters, valid_schema_fields) - - if add_property_ordering: - set_schema_property_ordering(parameters) - return parameters - - -def set_schema_property_ordering(schema: Dict[str, Any]) -> Dict[str, Any]: - """ - vertex ai and generativeai apis order output of fields alphabetically, unless you specify the order. - python dicts retain order, so we just use that. Note that this field only applies to structured outputs, and not tools. - Function tools are not afflicted by the same alphabetical ordering issue, (the order of keys returned seems to be arbitrary, up to the model) - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.cachedContents#Schema.FIELDS.property_ordering - """ - if "properties" in schema and isinstance(schema["properties"], dict): - # retain propertyOrdering as an escape hatch if user already specifies it - if "propertyOrdering" not in schema: - schema["propertyOrdering"] = [k for k, v in schema["properties"].items()] - for k, v in schema["properties"].items(): - set_schema_property_ordering(v) - if "items" in schema: - set_schema_property_ordering(schema["items"]) - return schema - - + filtered_parameters = filter_schema_fields(parameters, valid_schema_fields) + return filtered_parameters def filter_schema_fields( diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py index 3a58bb2c6d..c57090093b 100644 --- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py @@ -207,7 +207,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): "extra_headers", "seed", "logprobs", - "top_logprobs", + "top_logprobs", # Added this to list of supported openAI params "modalities", ] @@ -313,10 +313,9 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): if isinstance(old_schema, list): for item in old_schema: if isinstance(item, dict): - item = _build_vertex_schema(parameters=item, add_property_ordering=True) - + item = _build_vertex_schema(parameters=item) elif isinstance(old_schema, dict): - old_schema = _build_vertex_schema(parameters=old_schema, add_property_ordering=True) + old_schema = _build_vertex_schema(parameters=old_schema) return old_schema def apply_response_schema_transformation(self, value: dict, optional_params: dict): diff --git a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py b/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py index 41a37e2e57..d2169c299e 100644 --- a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py +++ b/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py @@ -9,8 +9,6 @@ from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( VertexGeminiConfig, ) from litellm.types.utils import ChoiceLogprobs -from pydantic import BaseModel -from typing import List, cast def test_top_logprobs(): @@ -64,160 +62,3 @@ def test_get_model_name_from_gemini_spec_model(): model = "gemini/ft-uuid-123" result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model) assert result == "ft-uuid-123" - - - -def test_vertex_ai_response_schema_dict(): - v = VertexGeminiConfig() - transformed_request = v.map_openai_params( - non_default_params={ - "messages": [{"role": "user", "content": "Hello, world!"}], - "response_format": { - "type": "json_schema", - "json_schema": { - "name": "math_reasoning", - "schema": { - "type": "object", - "properties": { - "steps": { - "type": "array", - "items": { - "type": "object", - "properties": { - "thought": {"type": "string"}, - "output": {"type": "string"}, - }, - "required": ["thought", "output"], - "additionalProperties": False, - }, - }, - "final_answer": {"type": "string"}, - }, - "required": ["steps", "final_answer"], - "additionalProperties": False, - }, - "strict": False, - }, - }, - }, - optional_params={}, - model="gemini-2.0-flash-lite", - drop_params=False, - ) - - schema = transformed_request["response_schema"] - # should add propertyOrdering - assert schema["propertyOrdering"] == ["steps", "final_answer"] - # should add propertyOrdering (recursively, including array items) - assert schema["properties"]["steps"]["items"]["propertyOrdering"] == [ - "thought", - "output", - ] - # should strip strict and additionalProperties - assert "strict" not in schema - assert "additionalProperties" not in schema - # validate the whole thing to catch regressions - assert transformed_request["response_schema"] == { - "type": "object", - "properties": { - "steps": { - "type": "array", - "items": { - "type": "object", - "properties": { - "thought": {"type": "string"}, - "output": {"type": "string"}, - }, - "required": ["thought", "output"], - "propertyOrdering": ["thought", "output"], - }, - }, - "final_answer": {"type": "string"}, - }, - "required": ["steps", "final_answer"], - "propertyOrdering": ["steps", "final_answer"], - } - - -class MathReasoning(BaseModel): - steps: List["Step"] - final_answer: str - - -class Step(BaseModel): - thought: str - output: str - - -def test_vertex_ai_response_schema_defs(): - v = VertexGeminiConfig() - - schema = cast(dict, v.get_json_schema_from_pydantic_object(MathReasoning)) - - # pydantic conversion by default adds $defs to the schema, make sure this is still the case, otherwise this test isn't really testing anything - assert "$defs" in schema["json_schema"]["schema"] - - transformed_request = v.map_openai_params( - non_default_params={ - "messages": [{"role": "user", "content": "Hello, world!"}], - "response_format": schema, - }, - optional_params={}, - model="gemini-2.0-flash-lite", - drop_params=False, - ) - - assert "$defs" not in transformed_request["response_schema"] - assert transformed_request["response_schema"] == { - "title": "MathReasoning", - "type": "object", - "properties": { - "steps": { - "title": "Steps", - "type": "array", - "items": { - "title": "Step", - "type": "object", - "properties": { - "thought": {"title": "Thought", "type": "string"}, - "output": {"title": "Output", "type": "string"}, - }, - "required": ["thought", "output"], - "propertyOrdering": ["thought", "output"], - }, - }, - "final_answer": {"title": "Final Answer", "type": "string"}, - }, - "required": ["steps", "final_answer"], - "propertyOrdering": ["steps", "final_answer"], - } - - -def test_vertex_ai_retain_property_ordering(): - v = VertexGeminiConfig() - transformed_request = v.map_openai_params( - non_default_params={ - "messages": [{"role": "user", "content": "Hello, world!"}], - "response_format": { - "type": "json_schema", - "json_schema": { - "name": "math_reasoning", - "schema": { - "type": "object", - "properties": { - "output": {"type": "string"}, - "thought": {"type": "string"}, - }, - "propertyOrdering": ["thought", "output"], - }, - }, - }, - }, - optional_params={}, - model="gemini-2.0-flash-lite", - drop_params=False, - ) - - schema = transformed_request["response_schema"] - # should leave existing value alone, despite dictionary ordering - assert schema["propertyOrdering"] == ["thought", "output"] From 9b77559ccfca5efe3f401cf65469d57c1d057f4d Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Tue, 15 Apr 2025 20:58:48 -0700 Subject: [PATCH 11/13] Add aggregate team based usage logging (#10039) * feat(schema.prisma): initial commit adding aggregate table for team spend allows team spend to be visible at 1m+ logs * feat(db_spend_update_writer.py): support logging aggregate team spend allows usage dashboard to work at 1m+ logs * feat(litellm-proxy-extras/): add new migration file * fix(db_spend_update_writer.py): fix return type * build: bump requirements * fix: fix ruff error --- ...itellm_proxy_extras-0.1.8-py3-none-any.whl | Bin 0 -> 14284 bytes .../dist/litellm_proxy_extras-0.1.8.tar.gz | Bin 0 -> 10060 bytes .../migration.sql | 36 +++ litellm-proxy-extras/pyproject.toml | 4 +- litellm/constants.py | 1 + litellm/proxy/_types.py | 11 +- litellm/proxy/db/db_spend_update_writer.py | 296 +++++++++++++++--- .../daily_spend_update_queue.py | 18 +- .../redis_update_buffer.py | 57 +++- litellm/proxy/schema.prisma | 25 ++ litellm/types/services.py | 2 +- poetry.lock | 42 +-- pyproject.toml | 2 +- requirements.txt | 2 +- schema.prisma | 25 ++ 15 files changed, 444 insertions(+), 77 deletions(-) create mode 100644 litellm-proxy-extras/dist/litellm_proxy_extras-0.1.8-py3-none-any.whl create mode 100644 litellm-proxy-extras/dist/litellm_proxy_extras-0.1.8.tar.gz create mode 100644 litellm-proxy-extras/litellm_proxy_extras/migrations/20250415191926_add_daily_team_table/migration.sql diff --git a/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.8-py3-none-any.whl b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.8-py3-none-any.whl new file mode 100644 index 0000000000000000000000000000000000000000..b4a2ca73d266c8ae91d7749b3bb95ab2ecd83afe GIT binary patch literal 14284 zcmb_@1yr2NvM%oK9^BpC-Q5W~xJz(%cXtT{f;++8odgSR!8N!%_CD*J+|Awl-Q2g{ z%<7pnYfXPuU0vN(_4n6`GN52+KtMoHuZ`%n0Re&h`3VF>`1);a;S8|0w$ZnDv~zRU z2e>&q8agrP>s#1bIP2@v+q;7TDgLGZa%ExY^lQIx;Qw2HYdbSD3tO|-{%Q&`l|77b z-}lhek(F!nAl+t`3qpeox`E@w3gx&uUNpKO)1>$LMro}; zQafp@-T{LUn3%x5t!fQFwyl|Q01O01uNr1!c%epio>}*DrZGwdSIK;Nesza^K`=ul z3#Sk@@F}bE!j1q&9)AA*2&azgL2v2Woe9rhqKc4SOx7Cf)$h)J9 zEL7SB6hY(|*z;Q^5S44p>hP1RCGFkqx;+q~-)gM2PlwGy<3YLgA_dD} z88_fvZe^Y3mYhydE(+&`QiG64qX(8a7quw0RSNAhN6679ZOcLJV^CR63S6YZPA9_c z=rA1;X&pyVLQ%rx*`GTolEAajKX6*rcUpa)z2_UfhIdEjsb@(U5&G~*=&znrZ?Yhh zQ6CmN)CN)T1P`?J%&^uCQM`t}8yIq_%$^(&y5{5btTi8a_PD2I=s^*`A5HRB64&Kl zfYH?A)o`L&21AZ$^~dZs2M^!;eB<{cTf&}_9AIIBCqw(ApZ|KhE*CjAbd+b(zS2}W2w+u&5_)-22@ zAQ*Sk8FM@+lq!11McXsbNYZz=u2Yu|vgs)<6&IiZlHur{B7HI8BWLA>QaCwktZ-11 z0z^Vq+n6p09ia}16p=3`LL-_@2-Fp^M(d3p_+y_q;m$SwNw(9dh z9i#F|QKBFqSuiK~q*nCPf~EY_ZMLAII~#bWs=?Kcl^8hC2-BHYX_o6iAA! zyI0f#WVJtWoj5f+g<&#OQX(Z&9)cDXTz!wLq&Oj1q7b9K|z)2!c$!yE9Cu}P2AqHjQAPj^oXn0f}U%{PZ{O=PI_15r-}s{fydT`C-L&Z8J1>| zWsAoX-1}a+F>0+p1ypIYn?x=M5~R&=%BT}2KAS5JCC?idnduhFZk>khqS~OXIcZP@ zieP^vIAbZAwI$TpE_1S4j8$w3b-|sE=v`Zf#qG~mfb?^dd$(>m@EszIIqsBWsV?*A z89$7J*`2IGNA6&sfWWUR#62T-XKs(K?HOjl9~4U@NJI?_|NSLwY$QBy#}R zs~oN=OYTKe`{t`9};<+WqvXq}wR7GwQgvm>PY!btX^OwXvUnR7Rlt z;i}N~S7&DN@8H21dZ7v`3iE7qnJ+_578ApT>ieybu?MU*nPT3%SY@nY?4RoAyS9YhYNI~49959LZX=ZE~1Slu+y_kkzmu?R6eC9NyK$x+#L$Dgk^_yCsLkjgdM*}=w%egwD zU$clzFd!hM|4r!RU}9us);BaU(Kj))uy)sXaRNB%I~y8VzX_w+Dmqqyj9A_`8eqA! z3rq`JIa#J8w3e78^$Oi|hLY`I4TVoPkpr9q^@%XRceZ=2S#>KQoyb}g=GX6`Mk%HW zkrYp%MGUsuP_aU3hb}e~8VjU~J~hMR!G4;Y_{tFM2~(iij%KW}tU#SW$JNr4abv=4 zCgG6on!<+JkB<3ZeULZ@T==v#_rYa2qQIoKw9}D3L*EBL3mE)zPg(X2g0kQYJfV~u zYP~Mi2b` z1@Eee*7ao6-NDiyT?aphkpi`Ap7K3{>B0^?%J85 zGBPnU{yC$D_7?h%00$RUH$ZEQe$w;Q`$}_2k)1+6`Up~-GyyQN zc4}*(jjObkLXVF!J=9R~*c1AcwFX&HcOZWqr>+`v#QN*_l3u-6;rGYM#mLFZ$;hT} z3vkspcC@qAx3n|*JHK^_6S4|oL<&B=Ll>c^{!u!ZGvkb5omk#iGnZ&SZm82bf`j;U zGv0VYOv{6mX1My`mQD-m?pWdKrrVKTPYLCJ0OH>nrMIusIff?+ZB7mEKuWq}hxHL@ z$MiT+VxSf_WXaZEBgRb`xCmjD>Lac@PAu>UkttGyZ_u67QHIE*|56e?? zg|(G&t6H0pNU$6S3Tm;;sLxjBW^1%6?*ejKS_Tq==r=>RztPfIO)EO7RluEw@_b8r zqS{_fG7+==v3odvwd2Ey#y;Qk?JR<=Z<&Sk$ZJghug0_+8^U*gHDc{+`@?bmwjlXW zdi?JUnuUp(iH((wUH`RSF}BioaxpdrI63JX+M4K_zHSNtlV4u+uT8?x%bP3Z1ys+8~edH#MDsF*lCR01Wi?-?RW;=;g! zN_g(tR&$1@Z2Ljw+b{?>WJ!z>?s{Z42ZKuPj+$7cb~D6v>M<&J2vT_09gDcp!s(b@ zxeK0{OXYr5cXt}%UyYEVj>~-eI(UcI(oE%djljzErx93~{#-dWhPH-guj4j-9sS>p zG&`Qj?scsMyS_vumWUFoNRXpOZEteUwMa5*LEvt)2Yr$wyFbl@2==Fr*OqzjmAmHY zkxClZ;koQl|1grM>J)+(M@I@tEpqsAOxD!O^THOr3|E|pcnTi}4}4n(EZMaw$?=2d zOL;Gon_0{XA`8UN-qGi;@3dhi#VRpk`|dWq027TC!ru2-{s`V)X4WME#9QFC)~I~_ zt_Ue!{nFuaDcVlKyYlcBxEvLXBz$SRWrE{DJ?ECPm%E*m*7ZYatKAu=g!iRXLmPb?fQ`{>#IXlB+E_R_{aM!i$3Ool-^hlA{@9hoH@xM~ z6TKM}HH=X;RDv17KPFA}(1JuuCp;8>ZSvBe_rER{`(~pdLd!4sV~ik0MP-*MqVjI= zbwdilRoFt8X!NEs9nVa;{g$9ovH!CiWAl`m!Twno6_ygrV@zi4hWHrP7O==LWFs$9ZH9 zOg}$}6|M}_*P(+r+O7=`-(>HgETG4NQB3QMu;GeC3JR7>K!U7Dfw|(PRu+=-Ark*O4)y!0s1Q=3ODe`MP@)d1 z^I*s`1s}o=6L%U-`n|#@Du44Gd{sYf5P6Kd-f8AlMtQrJ(nRHO7*7f#4m6kOc8{Ot z$w0II<9M}5RKCOyuCl7Bq$5Xgl|vAlHCU)d6A2Z#BEVoGym)1>>pJ*}-*l}*J{zo$t2je6933>E_j;FX9u8ViV-ycV7Kr0eHF zrHgEFXud0)34DVa&0c&pNVAI1@XS_UOgp2-PQMx>L=NETzdHu+c#}ijz(Y6Syt*;g zYh(Ehcm9u`>Ra0xThTk)*;vz1jxdbMO3^7-t0__sG0;mfNYl}eDQ$wh5mmWo;Mo8N z1a!#>1VsE#qMVG)0XBy8_Kp@#Hij!YD|X-6QG6$!(fd03x)1hDt#C$rx=h{) z9)Yxvfx$j)_ibJ7NT9P}gZ>NtiO~spt>rG#S3w0*OEgfx)Ne-)1 zMUTF9ClXalZL%n^Jdf5?|7hFTFe3nv`C)M=JGF+VPCQM!Cd53I#v<)Li0X^fj}O z*6LHmqjD^kqCL{zm`7DHz8;jQTck~D31!PAOJGmLIiK^cWL$d`WbYDi)%wl>Y7R=W z>+@>sX~(uT3Qj@3_Pzi}-ur3O&+2YeH>;n~berBT5N!B<`HH!NPy3#C6yQ>Q6iT1V znf2{PoZZ$j`4Q&CbWX|`?5D0|+|xcxfK9U4z5e?zD~t2;B*&*;A%S<}1Iz0JRzifd z`wYS4SNWfCS8#WvPCeIJnNILZMmmzLBCoa9x5&d1fh$M)Lx||=0Zt}Y z@X4Y)J48_-HbJuQW}mw7J3p&|oMes2-tc3)Mz=oAMu5#NAX3cSd_T4tB#iRSYg>b3K1c8Vnc|=J{zyiT4y-DvgUjyQ#(3z3KGnlmWbum zST0@gw;G{M*1nR#g`~9{QzJ3-P>zep?x*v81`Jy}v4cOJb$v#htEBOJrw-lEsKB9* z@4FKfgQuN&572q z^hq1N=7L9)yP)-&aRmYo5w-qLQp%ErI#9Jnd5UMPE}i$?5RyZcGmB`4r>QVjI9 zaV39qId-r*du(4lW(HC>NVUsV#gx50IEk2pMEVzS#LMc*NV1&JTVz0zkZfPFe+;St2>oeL#HKYl$zlB;e)4&hw- zN9^kJETbTefjSp#Ez^_^0i-bzRkDT-n!~=mp^e3EznYae!Jsmq?Lvu8aQCqM5kLm~ zJV&_HgV07UWTOGj4nB||?UogKKVaGq#Fb}H1SBXnf-j|l00c`QaD|o~#p;adFs&0_ zAtX1nCpA2y=Oignyrkb|>Fj{G@SE+r*1T&D28U0~J^n0zCkM~^1(Wpm$Vr}J_)B1o zzQ*=jpku8Wz|hNfZ81txq!Y>`xjn|akH<-Hff6&WgYyo(aJ0gegA(@d2&xxNO!;ZA zccjl6m}N#wX|qOUzxv??gZvc!(xMKc)q}Wwk4;&^uSO*< zKd)gbG*SF$1hE4o#lEjTHWO7K7klMz%Hf*Um*sAKJninG9LRezA9z7|X0W9XR2lj; z_UnY-a~F^RH)Lf8;Yn*2B#3WSPv5)r`WP5%Tlnm=>B$Q?M(v|RS={O<`5XycY!Yl_ z+G`O?t4r(rhNUn3B1Cs37(j6Q=IjMdUV>ug)to691ugPyBT>jd!0SMu#R+BXwYJRD zlNk+pAtDF!OL_UlW$C4f?1+>|+qa9!jY;vefjNmXK#=hmdM2(O7QR+qQV+^=t2*o~ z3ErQ#kJW9Ug9P&FVH#~Lc0v+Hu799@$;4&FIOA7yy&e*lMGP*VfdY94(y=n)-v(PG zz|fQ-ii3m79E4gGb%fPOXL&+%sj`N3$-KfNH zAH|Xbax^TO@^UZFGAr*QRjOq)nO2K>1_X+EH<>;$SzUru7&D)r;TJy0dPu6Uqo@z< zN8cHMPnL*DFATR?dd)Bb;DXYP%4_Bfr6uJKOclL;93boBS~<;8AdADZ2OQQRw8o?9X=7%hErLm}f*;Xn-1J!DYk>MFrv0 z3mqDiY{i(YID9PIB+&i*Ascvrt4UNr58=~Cj3A-wfO)a&1mBEYA+n%8Qr5ZV864)` zl~HyFA3})tOnue^P+d$aeKT%Q&8E-JGog zxtRSzf9UO}E1TT?;n>&otgTVtx_U zD|rea7zk@?mTGrmVv`-FU`)iLnro!yfOY_Ki*|HfzoaZXv+4fjt9^j<++)uXDup1S zLthJ}KE$F9>Qt&03MnwtJ!QS*H^ICG7-$j9qnk1PET@Y&gN^JN$aZJl!OSlCy!jOi zBGwezO0;+{}xQ#KG0fY{gX% z3xz-4)6lNByl&$kmj`_ct)wr?iy`o-e1isK2<-zTa>bzH3|zDHXwxiM*cH_tyeZ|>Nef$G-!I1(`|kODD&yNK03 zvXBgS*MNmrND?Kb#^T9jBh}_eLce*Ujn1kj>#`{;>`9C;gykYrV>@fQ*iISgLUkw_ zxvYx9Ct*xYlW5oMu9|3y?F3QWhc_|gC4Z_ni!WFYO&j^OPqhdKuN?{v%w5%jM^}5$ zSc3VasPVxVq%i+In5Cb3Nc(!;UIFicANhS!FG`%VPS;pPV5bWnFUx=^k0d$Em-8Ph z{Py!3oSOttNOA0Kr_eHob{=^C?e_vIyJxoMM!_o;ajiqPJjH;`S-dqhg(Xs(^=3nT zE_eZ+Co7;EgiS;ug7FJ&3?26L8Xy4|)2>gzwm}`CB>D)uV8ZL`V0m%4hY2;%rF>gM z1U05upwO!%8&=haT8)onaVwzSzxpNz=rbwvZXL69YAKd+@74Z+G- z1_cE|Cti0SADnh>cXtmgLU&h(`wx)AnN3wMPq{yK4lzsi=*h%ee@dV$WKp?WPJye4 zs4-S((n#A)X{tKX(jyO?Yu#ojK_*+MFx?C5q%JvC5z^|=DAq88D?DG7*r1H+i95oV zzzlYcvFv+`55sTU4Uo3E5{AnHXJD68*^Qu=^L%s0<4`b$@u*-wOTgqr@{X@Nht^-@ z!HOO<=*K7G9kQ2@YUDYW9XHj;x7OIOl8C=zDJ$w_-rrMoRsvsP8+b&!mpoc=3z6ig zt~1ah*eVT%pD}qHGtdLV#=oFn)(fhm6-TW**1Myumh*v!3B9AU?b^qI*YAusNh6$Y zi9+q|?%l)@JTz_zL6z4*Gr%9*bOEOCXY;G|n=3DJgm#`)%tOxuqN?U*y{Cz+y+yvi zi=`O>F{c=%t`z7$Qg9B~a0OPy zQ%n)5hU3#0ps|Ml+%Fc{?c8@K7e`N2az_O!*vvJSfviwzYn%E{f}6c-RLF5(_Jv>S zm57Y3Lrb^SILz>@qijg4X2oZlf2JUPCzdIZ`3BBeP^sye+pQR^`4)_&Gc z@9l-+Mge*Dd^kR-J(4Ea zJv#yiaMXxJJ<}s8%0MT?6&F`;X*+p_nD3Up8lgfcM0!VMUg#{(ed9x-AkR+toHqs; zd3U!*Z58_U+{@eJ-8dV?caT6pRk%Pv`2SS&xHwx_JG~xtk86$FerHGOI8{L!0W_^7 z<2Ubj`vr1`qoRG%ng1{qhzzA0-b#~3Q$`B#G`wAC`Z4=cid*C$bS`(^0%1q!{ka+E zCH&oZRFh%%S7g0wwYgT2m-HvRVFiR~&P|}gm6?{bU z@{n-vj$}P$ zn$EDO=26qZA)`70j|c>h4iQ4~GV(hQ!FCSE*u7!blZn8f0Xftx5B}ag=b^uy}JxG|JvB^3=1i{Hzjd))jXg8@{NCx94Fk9T3o?%)_&tXx; zO(+zo;a3%kERRV3^FX7BwJzPwfPyXO&>G#=PH81eI>o}p5#G$swkE8v!aci%$IA#i zag|{E;6@+HquG-V8MMk-%R7JEin)++zYUG zYGC9oD}4u%NjTsi$TN*?eet8Z$i+w$ITQF!$dioo^(4Lq-j$Njd~8723kWE-gI@ts zp7rjetK<|S7-f)E2RiOt$xmdMs$rkR_;dzYl_9 z>yM@U5vDc165RRjvY0nHx5~?@5X(-o(Z|K)rlCRjJwJyU08)tVIFZr`kLT6CUQq_) zoZ}njwNNJ8gr;r^oO~ra6ToVOc2ESTg}E5@YXe&`Vbi;I z<+p>KHfvq6;$+Q_07OLyvLltMgXP_VN^E)XFfHN%!4mtrp$AJTB=D)r{NU}(X2z;{ zemWPQm_MqBjY;K~QG-hJms6}m;QE|0FdnRF2+oL|gtiE8j2px?3&z!jw$w)uI7N(F$Y^ z?)nK8oAc1V823i;5dw!Sn|@56v5yPW{QBW53Ilvq6B=t+h8I|DfdgU-MFU2ePR%yM zT}0jAGyLT-av@XZCc;_adsxR4lyu&ho-!f7lTd) zE3xrj`N+Jy<|2Tsji2D2 zo|?D+o~XE^ebF~~IYX-^*%gEY z1p@!(HIrSf2$su`E09NJAe_f?g?xAq;yiE`X@>Hx-Pl#Rrc+oY zhXyPABU@Jn?fHse#o3CsYWG#=-NplJtlz6AiOij?JjT+VW9Z{+Bw?8{3ukW2X9z&4 zvVKf|pLm|O?}E#a2!gE>Li`q=y)MfqXIdsk_BD6J`D^9uyCQ27_vAp4FO*9bSzojG z`1zU3B#U7NjCiuL+?OG5RRy}{8T325g?Y^UWCk3M5e7U-$r{@~c(?{|XygZy2YlOC z@@cR7o*6@^`RN`NZpq!cx>YfK^~z3h!!!i-!8ez$VtRQ=)gN#9UhaN7W zp})oWnlXX(%K?^rnqw+jTW0lOybm54{pYGyV-Xzs{RzCWbpIuZy&R$+(}kcj*xlXi z#UEch>e=vIv(F!(G=!&Gjl1$n);W8^ym&Vv+D^Dbptc5(rX8NrB|#7t%T|Z4 z#f9;^cLB$~)3!b=Wv$v>Pco2}U}bHtBUK9H7I=pl za@&w&wv9{|O#kc!dM*0d6}r!mGq(S2GK~^i92Gf-J2Cix2@b{bN7-CMe{YMBd7ASO zi1iT3?{^*^I;Vx=_tF@DqTc-Wl87k8!#6YBXz=rPbh{e>6K`; zVbPMKT(w4UeF|QJHM>&6s!?`I?ZnzY);aWX5XZxL+2-PE*-HtjHDF>(;W_gs8wDhH)54P z8WKl3lIaf;E(~dnHh^w#3T_7JQ9c8!fw3fyOx@&qfc)#pR!ngv+QRGE&*dwfg6?-0 zl$@BVkf@NVPKLn+(SvLmD1%t zHpOB}9>&IM4)5vsifKYLpp6yMhQ@e75JV;P?miHEBA5u^>UvT;`P(}TV0DX@WJNKk z-4Pesb3==@u+a|KZ6bTy7oap}#fb>sj;kvEl&d+kx94Cv*<*LezD!}@xnl9+OtD7D z{iBrc#m>254tmuq&feRS&RBy~Wh+V_;5g0}sxfNu@XB0`RdMAZSN< zNIEq5Ji(Rm73Jf!4Ps8%jRl#-VlU;1-80??`?PyZ=&tj!Ot48Z8xbLa=k1$)aq;0I#_d`9{6#lOJZE~}*4KED^jdAv{H|`wVj>F4qRC0a31yOKAvZT# zQ!?b21(-vGl*E*3d6{sgLgZ3N+xyFvF=aZ)0JY`Ts3Fvs*R6i&OEabPT>Vcsk@JNH z{^w{#bpXEb<*xX8A|m!RLq;Nc>#(~`&5ha6w2bROc~NuI)=G=Xm|UG>tjt0E=~v8v zM>Sqmx&zknva3YurplFOmqwTFA1UrEh3jr?_ukVB{h@8BrR>#48_fjIeBsu%xi=tD>9J^&hkitM0 zbymwx{eGk$emJEB%cFeuU2yUvCAuht67y02t{A(8;o_s{6^Mdw8c&7I_CaQnmbn=z zBvK+bMoz>~nVr&Gv3$aTRB##$DC>+C8qy6#@FKd;j(Kh%m45(VTKOaL-oT6zORtj8% z_1is4i!*U4|0enrpPdrd>g|{t)RFv9tB5-AnK%U!b*sJ^O%ghWrDe2ljN@3T7HvHU zLJ0;$m$WdX?p;i12S%Nu09}ZieT&V;F-9L)&mqT;l@~*3z^xob!-+<)e=4Zp$1;C(Sgo4+z8SP3>ib?PcpRYg6Rd``n^5P?gn;M(b;<0J3 z9EsJzW>oYZIalC4FW$b8-?^>Y>A2;%y4-Di1gFkiX7D-2`w+Pg1(z((9Klmbq*J$P zYGyxgeM(oqL5gyg>EFLWMgQbJYubFZ7Tk}X^HpBxvTZ_976=Vp#)31vG!BV#`gO=RS1{|X3)=v`#rBzUTX9lJ<$gI5+8xq`X^pn_a%VP3 zp)!YM!U}fR4`*0Uk;CE+qFT;xp3w`gzgD*Ouz2&l+k@E_5B z-CTH!_Ll1Z3r*oQ?EbB8|2b~|H*M*EApXA<`IaL2OJv0BZI0g-`MZS4x8mRO`F@Gl zh52pq|L^R+x9Ys5-ThKW6!y2(`CSs;Tj6hMYQKcXzmg(;Zz}$Ok=EYo_Le>NOScsG z-_-4Y$0d8Kz*|<;F9lu@ep`Y6702qW25*^9zcg4u{A~?>p9l3;{97K*FYyz||Fii2 zY!km&Id28LW#9Y~z=iS;0{+6qd5ahJ=E3$aJoYwWR zx5#fxnqSC^RKJP*?*iv7^xIVU7xd$6_W5_q`M)XsXJY&Y_rImF|G@=Ldwt6BTe$y9 ZY87R`U$2z`0U^J>ab9!cEXF^6{U5>A)`$QA literal 0 HcmV?d00001 diff --git a/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.8.tar.gz b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.8.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..a254112d2b4eb075fdc479be23f7e4b27c54277b GIT binary patch literal 10060 zcmb7KRa2b}jK-x<+)8nGcXzkq?(XhzaCdi#LvgnQ6qn*q+=~`FKyi8Z+x-Wd$wiWx zBv;8i5z=@hB&h!g25_|Vv~qNGGI4cx@%1yY^7V8#^I&FUVQ1lE@icRXx(ssg*^uk7 z?N17pHu5jT%?Y7`{{jmNA0nTHgcDn&%YNP0ccHE{Kj*6O2m zW`wneaY9|6i>7_Kq1C?IeopRXvJKqpze4it1-yy^Q>(^`>p%|e){Jk7*XQjreK!$- zzPCtoi}zfJd7xVlkwi+4#{~W$W0y|-8UNbgdF}i}@A?&cdeyj-XpquyStUOyGtuj? zy_BeL((%>r+3XOzVUGz#&;ER@jbQC};%FAf)t@?A=1Tlo7;(XDhI5fv zycd!&24D(-LPER@0_{3}@-Avze}4*pJ67Qfau*T=-i}=jj%X|bAybF}k11+L`0x#p z#Ss0lSqbe%zzkb^vpl6y_vz>GE2z{rl0^D>1+9mN2b>7L6O1@_b5`vn_TD==KXvj% z{W5uD^>KE&sw%qG<=n3gD2e6{KFcM4aC7+y6T}+RY9#d@X?^lI%d`&GrCPNu#fkA5 z)+|&xnTGQ)Kl8Qf(%S{=Pspqm5#&WvcF-yk4Mc@gX$(iCW`FsEP%2Y1M7U66%^+sR zbQeF-x4#W7Ulsvlt%vPVV?p|6zLfK|6uxd&=87T2^K1yIsaT7n)5{_bQ0J)|b$2AZ zGQ!mjs~%TGH|}{#OKpn#vxbP*ZQ6nJ^Curq;(gMf=_=ER1w*Y0~FW$q>og=$UPrz;a8V%v_N8>-1hPePE-_>3r{sI0= zqkan3lAR-5ITv(Gf)unyBpy1o`yN;+?}f`^aFnPZYrsk0e*O>lm9NGWv#qrvXDR0@ zpf$m6{_B5}_sYPF{e0xHD)gn*^=%z`y(eo=wj{28q)+*);F~x?qu|NOTE=2`=+0v^ z=4 zvwozTvP?CG2{kz2g0Kxm&VT7W3%v@(_y;4%^!_};;4d#Gz`Jw<>WcwLsXttP{Q-t; zq@RHFap2wgs+>^&kT#5|k-qGPE@9%S=mFw*Xi_e2 z5XdW=$%$YpbF4G@6j4g+M1ePtx^ADTA>AkG8~p%7XG}F{*?faP%(ZTc!8a&|^elJ- z>+>$r8Zbk9@R8NKd14WjEm=)f7~^AzCp3bWOSUfPrm}qExek2J#dA>kA>k<*`{U1W3NQxJ<={ z{2(Dd7(+NJ-@QcbwVcS(QamApJ~_cJ%T^@b5)P-sywX&(cyP8nrRKQ!2xrAH*(oU2 z@w+j6Wji3FcK7!RKW%v3PT%?N<3%s#YBMB2Ak{igG{aTH?|wRvg)g4bxel-WmHm6|=RazEa}ls_uH;~BGFLUX&$-PxGDXW`It@6=!>|JudB%&<0^Eq_CSP`OW=grLjWDM2H zv2oMjx>;i55Vxl%g>7*e9AdPo)EgVfB~Rd7NXoFhHOuR zz==E|8|EjaCv=q;?!RE-+-S}G@tTXle5FK-NX*FkW4gY%uR+eb@}v1n&p85LTx7cx zJrCEcH|Q0)1HLjgiT`^R%TFPk&W=$NfzL-0uVT2wZzpmF0!aRS7PApIc2~ zzI2j78W1PO*$m;&ox(qj*L)1zUYCG&G5kA%>x%5cft!L`iOW{m)=$>^Ko{izYh`5H z5a1%{|H+SR8yecJ^*X<^b9|h=#^!VB-!ZnpFYOji3*FW^H)G`+T@|TM$6PwYLC4?a zv7OvyWqrqBs=@R+B;v6gPNU(>bp`65!rt8c)jjy%nOK$S)4{k?tj$`bfNR%`u=?taxZ{n99yC^oZD#!Fsx{+W1r6h!oDqv#fU#XBdXTZ z+3?YLtMKPM89Vg)rt&aV-SHw;@XS6fl(V|E08c|WqrlF*DL7MECXm(c>^yYn0nxqg zn@4w6f9$f2%=2_}c05zc*Wg zp@Abr?MfZ0Uo&81TABOb!-Jj5Jk<2p$oJ$wl7E^A?0V2!*LRP>YfapGOEVw5>M6vFU5;BMVzM>~gojJZn2B zHmwbd;-JQpYxC3Q`u$FvhLA3TFU0F-!^9Im=)P2q?pjro!@w(z`@qAi@jRC}OhtDkN?4 z9l4KdhAaX7R;`;n9MbMeRUMMpcg+Of%<~1^*G@D7B**TV3WFhEd_2})uk%@^MOAu% ziFK*ewd^4~L_DT|B}M-BY^zO`h;!WFt_)`$L)Zjogte9uS920QAGuCkYmTXA__&K> ztjmZSPmCprck~n zZ54wByp046RXdEnPmqyWWQoe`gp1WI*5c$Y*MF*Sj1n-u=k8%xT~c7LdWt;&v?+-J z-uNfr+W}zhpmPK24c4sXwP(VR5`LVs4(T)$Bu&|s@N|8u>xzUJBm=9hLEG=3`|x4Q zw_TxX+#7&$6d+#0&K30&;rQ}?7kKzQGfIto@UYC!Bp+eW^=}CH^$zMQd;kz50Z_r) z?ta4?(7N&MyxN=^EW&l=Mu0WuzHhrIo*f`@AghlzQy{A#oQH3kT&#^PIoD;%Oa?nw`|kMxkX-^_M;~`d z#(~Za!CPQP0M4mCd>9i)btiG_$Wp?{r(EyF_7f-4k7!Ow5$0v->dDyjUG?+^YIYiHG3rlOd3h!t^uND^v!7Hf55%= zK0xa8Q5+!8YIFX@l{-kQ(GxMnO6ju+T1$v7x#&}VK;omJ0TZf2mWRiy&K7Gr)y&l_ zZWSACOx{jNs<{ULXiQ>fah;QJJFeC63QD@c2&m?%Q7;fUs~rX18;ybzH@@`+%@Z~E zT0ar*mQ8FIO+rD`u%LJdFDc$$XAcJM9ZhCqhu8vLK0v~o ztA&7l?0xZrSzoZPtL}f!E4QMK@JPcUGqa$homjmBK;MV_HEPrD9k0Vc?9Rhwvu-_z zaeNU5dFyv`8;03zbDP=~sV5V&qh$3c!VFwzyGT>Tn}yv%7K!(NM*3@A{kFn6zN{>o zJ=mm*$!Y^4#em!TK)@&sV0-`xPo+2Gzpl5+V8-I#fSlh*--!0{O*GlEtUEqVjsUV9 z)j;6=yB_NSAnO;nDpT|)*kLuFABX0&LgG38ZI|)|&~E;CY<~k%J-=L48!?UGewZ8p z+mpnF4u6N(x4JtbKWttBnycsdMu1z500jUD(1ZW{YR{gHNQ<>o`$b8LE74L4BaxLf znf38KLjU?##nNokT?^-FlJm<+Mcl3YH|yn64Ezlh8VMS~0I;V^GlNuz^?qyWD+#Hydh zxwHbthp^mwtw1XN36_=HG!O0l`JRBsn)rWkST$HV{cXFw9^b6o47J!U2tj2tUEc*^`rf}k2)=n(JDm8uhKO4um1IdMp)(fQf?TkW6ND2nHP z1vtC5l2hUVsymLo&Q7=y3pA%MD0@7b?vrp42Py%ou|Be~wc^)&-B4$OJ?`?yi3iA* zsC4rVLqrl1#Ob?SW&&THe@yE4DFs4PhZyT4R2vh8fG#4>)Jkz3 z>^*){h{mNkO1wD);s>XOF6_)rH*iJ|Mn46L`F`;Ye)G2zvkAS4rgAGNqDO(ldH-dC zg`d_)?kK^%-svyT*VZ;{&iX#*j3sk0v8O;Pn#rgCQ6bFZX?%0Z959!a$0whI9=}jo zxkAw{i>bSkRGz!-raimKI%3paj4Z9MR#>m{%RCl;tx{5Xto?TRd*u~LdM9+NbL~ip z#$PoDi9hgQueK`|d8a~{@Lz)X9@LnkLkg>YEaVi^agCjjeKrO5&<@JgZJpAGNVe^< znE1p}1;}gS-k2c^w^Sf?kvNp#a>NQ1Dh3h{oT%IVd{&W@E*Y8!+uAqS!?NS3NRJhO zF6d}H)+eRGZr?XlNsPtQa5aY=yU{}r;yAR@9*UoJ2@Udu9Xh4v(@P@@4oS#mXdc1c zh;d{b6|-15gO|^5{Hf4qYLzF#%f#yp3hd74BQmob?7u&?CM-}^DyH(@or1_uTRZ0z z6Q9~dzjU()1#6;Ly!|;ltoYovNa1N-Y_Qd5^&!FIYr#rch(8I%>79oFMm-1jC6*p* zpGK0(ny;ZW&WFiQuk7r`pkV7@s5?5(m0ybwn|H7tE1DBxJ<`=6Q4BXyWiXu?rHy9l zz6M6zL=7*1Q`%3ksn;=vXN(3h=59PHV8{yMIO!WdO&)1PM%*J4eQ6ar*jeqmN8T!C z<6*CmTW2g;-_M4HeR7pZaVf>iJKxz%A{n39_837ZKcabJa?-tH1w9k}8pc@sZU z)$_$vzvUs$N_0V82WDW_*!@6<6}7jpE$tW@#c7gIWNbCoHejn!=+-zNW9}2WC~BP&CX^=BfLcxJ!uYy zZz;uxy8KO48Wu9{X6ie~(nX4~V`#ZNw7VTqEBw70v3ytiGWlZQ855ov3!^+-ZVXnYkqM+)-D^vH_K+UyFy^6|;mlipWnZYvqterK zR0(}mU3(Ot@0=%=!ki>K;_HUPswt4#vaG=}8k8UGmxHpfQ45S1o$nqxqa!vv^9>b2 zYig-GK+whi)r`!;(xT8tIvS#>;F3N!NLz=MCuO9qRlP_+F?>Ia7aT5kMh~3?=R7!! zJ$j6fgeq19@H% z6*&w_Xg=D7O9fiK1s5jj=ei5QfYkA%JQOvLqH0FVnLq;^!e^RrQWf|bjp3M}(6PcW ziSwwhAE5#)MuExK;bs=FOl-C!|8B+bBu%rbWI7R{;k}q*vQs!z2YZ^UY5TQ~MJJ)G z%Y?4HYWen$?VO4x#F?fe;uM~ETVzwq@p0sx&4&3WR^yij5B?r?nlovEpjv8jg*rQv zKC|Ote5S+Pl)$0E)Vj^fL&?^$@Zf8vndP_=nXA+FiiIfnq>MhOOD#SjC+DEX4TBYU z2GaPv^AeuSyN{(RbLD;gCuJzK!nWjdZ%uS~c6vG&Xd)P%3j*)|&|!SA*2}hhyfJ83 zyAP_{~~7IoYKWd9ra+> z9VaeP)%Wd2SrUcKx(vlh5xn|Ggi6LwQ5)_ zI>i3TcD)E=;4>&R+zJ1veX6ePb&DBV?k7yrmJTFXYEYS6Y2W8TospzuLvclbBCabG zKBX8*A?oCSRsLjqNdhK9t;B*j!o`F$cIWYkR(tF6XY|UiA1t(K{Ka$g zjS=0+vlv)3B%`+~t=5K?E0MEXrhKN(F}-2*yF??%grTw61d-h-EKU@L_AzUsip9|c zVy?u(d@z}Rk}#&+VCT6%%%lU>2qCy$a@U}7v5y< zBL<2x*@f}WS}^XMrw^+khL4ROH`bnRUK_&65~ZCMtIJa-8DIxBI3r1QS6z-uCa}0! zNsmI5?UKRV8U$GiicXl;}0}bXFpLN={sajeEyT-jQ^v6{0mRCN%bDId4sc zGI=mI1?DE=kn_L&pg(R)1w&o5Ecr?C_3U_?^;z?2PdczIo>mq%QkHr;M3qGQc^OQ4 z*>MKhbOzZ^`sq~L6pDFD>CWj3iN@5Dx4mTDGBhY$Zazv^Ke5bi2@PnQ6EGoU8(N3o z@H%P2F2!HuhAMs0KGlxyN)f9-)b|nxES->OF{_nEqQj&1R}0BdzYe3?*y)iGGjz9z zRrsR)>>%T6)$4(a)6)u}7K2JO^``Th3Ja1^AZmb!er-1{rZFsDLruC4KQBbUHi=eo zjIM8Jx^}Ag7TJ#KJrDC%g}pqsQ#K=OaB@o!OH8eD1RsX(6}f#YIyWl7gCrLv7uX_B z05`Gwr-l?M@W}-aNvi)alTtH=U{_M-IR5*bt{#%XkG>GXA{*;N$l3tY3QF|r@+c`r zLwpFXbeR4xKg+cVE)nCWeRN@AY`LAX%lz55L~v|v2llv)ZevMf&Nm@P%oz9Y^4pD4 zO;~bpf4zNa1y!rq#~{q9YQU#D)v9QtUURn8bf>o@Z$0X7cvSzI0s=rNNpLSlvq;+> z^-`5?5N>`I+=iOblHG$hklTk&@CR_ZzQEb;w(Pu&hW1ICeio>j1z3fLJ%5kd8@$hn z04Ab*LbmaDz-MsCU-|lP4xRtW$+rY|k(a)f)*BYmZyiQJHmrtVz!h-BGEVl1PZ6@) zgyi+tQ*$u1$5t}9B^FSOIV-vm5hj^dD41Chv$sG{|B$?3*x(g+*wg~%?pQG2 z<#9ALfvt1eMS|cAp<@)Sn9mGdv0{;NsuP~v>$!n7rZNnr3u!r=VA@+7ick~f7bdG8 z+J$Cw|0j@{eTC4$!hCgFxiq*kZPkyYx*6Vnsp*uiTL1VlJfUgSf=D%t+}u9X6YA-!jOz1ZaG?VD>QC7aCsh0IY>YvWE zhD04G;aKU#FHJ_IvoVLRJie!R)Ko4%3tn7Fc$iimiI9d}Xz?@4JP1D6P;W>yF$$OO z?hVZ85fo=3jrkHNveliEX?hRJs!e)mlaF(3E+(F3xY%1(32=AZqRR{#`!`ROoT-`R z8EB`;=Xb^(@`Zym*(9JO&!33Xt z{Z_jC)!ou4%Y6RN%j9;se z1FuK6lgKB$1e8HTNK~z!V|e~gD^ffUF*S-wL^Dmc4;u1u zt!Xx&44ataQ$;hDnZoLmKJ`V=lg`qLh0i@9%dW#wJ|*^kWW{&o8}vBBvcR$I9d@zB zr#4$5+{look1qD1ux;;|jaHK^YWnF z(E6={QHU5wX5uN6McC!OjHCSqy2>1d4%*bsb)$~_$>egvAIEV`FF1RPPRYvX;$_uv zz67kzX1NwNO`WMaIzH^gJblo;ml*|-5lE;u(?seAp*GwukBFqcB7!I(4an=$L z)&xl(WHd5oqK#ev;2KC%shbN;ft;d;C^{u@whqr+%H~NDo}zGTV8qV2V%5iV8fdsslJQ22_Tl9KX5g zjWV9qA{JAwa(Wt;pK|v+%&y^hiU<;Xqf23*r z9aPLdN?uML%n`i|QPn^dR+eAG9f@0b;@T!DDoJnDeEGrVcv zyC7gsg10FeeEUusj|(0#3g$2N-WdEy*6*Y42j)fUc=hYYu(z-lMd%(tLSuI;txg(7 zYo{O1xLKfJ6@`{IZFxUcVlW>HbWTJDPT?)Q@=ZtCIl^wjpu6_|x_)zK=J*Q32-=G~ zg?Dt%*g&{)m&Z&~7$b!go-#B2#$sQ`5QrwsleAjY?ROsPgp;(Z>#lQO0BZPG9@cd7^wTSFZ#3 zDAsFD0$~G1HBNOx#)Osq&EJ0sq7u3@lAp(egOp+OuG?J=W&4LzJt zKIZYvl0#rTFfu?I=h_sS2QxA8^B&DYhH?327SPn%wu(P`Sp8$#+_bL4eVEb@PUa}N zvFIu$w@wZ%)Dg}MJ z-6^E$QTjR%zO80YH%a8ZoD{n7qre9KT8mq0$ALw&N+1U*068ktprp^idLRk8)!4R! z@IMU0*lEHI$Zi7*5D?cpDKju03jCP|pnWs2x0c7zdWbB>ZIXCwIy(x63tVCw4hVKU zus;ptZ4TAWbu@fybTX;v56X;kpHV*8Q3l>}MhcWOExYo3iFXU5_ViLp@=?uN3oXG7 zA0b8CPFtKdy+wslZu2|;?OdMBYWMA-1tQMjU%!)IuX+7YsLWy_DJ7noTpSc8p!v5r zCTZn-xUB4HBUj#>qYfKaGJ}W6y;OIA*=8aAd^aaLo)=KXln`k`#S0HrZFn#5oY+}6 z#{4|W>U!i*nd8+t8pgDvi3>xZx0BY&=kN^Mx literal 0 HcmV?d00001 diff --git a/litellm-proxy-extras/litellm_proxy_extras/migrations/20250415191926_add_daily_team_table/migration.sql b/litellm-proxy-extras/litellm_proxy_extras/migrations/20250415191926_add_daily_team_table/migration.sql new file mode 100644 index 0000000000..a6eb461bc2 --- /dev/null +++ b/litellm-proxy-extras/litellm_proxy_extras/migrations/20250415191926_add_daily_team_table/migration.sql @@ -0,0 +1,36 @@ +-- CreateTable +CREATE TABLE "LiteLLM_DailyTeamSpend" ( + "id" TEXT NOT NULL, + "team_id" TEXT NOT NULL, + "date" TEXT NOT NULL, + "api_key" TEXT NOT NULL, + "model" TEXT NOT NULL, + "model_group" TEXT, + "custom_llm_provider" TEXT, + "prompt_tokens" INTEGER NOT NULL DEFAULT 0, + "completion_tokens" INTEGER NOT NULL DEFAULT 0, + "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0, + "api_requests" INTEGER NOT NULL DEFAULT 0, + "successful_requests" INTEGER NOT NULL DEFAULT 0, + "failed_requests" INTEGER NOT NULL DEFAULT 0, + "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updated_at" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "LiteLLM_DailyTeamSpend_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE INDEX "LiteLLM_DailyTeamSpend_date_idx" ON "LiteLLM_DailyTeamSpend"("date"); + +-- CreateIndex +CREATE INDEX "LiteLLM_DailyTeamSpend_team_id_idx" ON "LiteLLM_DailyTeamSpend"("team_id"); + +-- CreateIndex +CREATE INDEX "LiteLLM_DailyTeamSpend_api_key_idx" ON "LiteLLM_DailyTeamSpend"("api_key"); + +-- CreateIndex +CREATE INDEX "LiteLLM_DailyTeamSpend_model_idx" ON "LiteLLM_DailyTeamSpend"("model"); + +-- CreateIndex +CREATE UNIQUE INDEX "LiteLLM_DailyTeamSpend_team_id_date_api_key_model_custom_ll_key" ON "LiteLLM_DailyTeamSpend"("team_id", "date", "api_key", "model", "custom_llm_provider"); + diff --git a/litellm-proxy-extras/pyproject.toml b/litellm-proxy-extras/pyproject.toml index 8fd40a78ab..58577b8292 100644 --- a/litellm-proxy-extras/pyproject.toml +++ b/litellm-proxy-extras/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm-proxy-extras" -version = "0.1.7" +version = "0.1.8" description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package." authors = ["BerriAI"] readme = "README.md" @@ -22,7 +22,7 @@ requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "0.1.7" +version = "0.1.8" version_files = [ "pyproject.toml:version", "../requirements.txt:litellm-proxy-extras==", diff --git a/litellm/constants.py b/litellm/constants.py index 12bfd17815..0f35520b2e 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -24,6 +24,7 @@ SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests ########### v2 Architecture constants for managing writing updates to the database ########### REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer" REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_spend_update_buffer" +REDIS_DAILY_TEAM_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_team_spend_update_buffer" MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100 MAX_SIZE_IN_MEMORY_QUEUE = 10000 MAX_IN_MEMORY_QUEUE_FLUSH_COUNT = 1000 diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index e0bdfdb649..c68f585405 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -2769,8 +2769,7 @@ class DefaultInternalUserParams(LiteLLMPydanticObjectBase): ) -class DailyUserSpendTransaction(TypedDict): - user_id: str +class BaseDailySpendTransaction(TypedDict): date: str api_key: str model: str @@ -2784,6 +2783,14 @@ class DailyUserSpendTransaction(TypedDict): failed_requests: int +class DailyTeamSpendTransaction(BaseDailySpendTransaction): + team_id: str + + +class DailyUserSpendTransaction(BaseDailySpendTransaction): + user_id: str + + class DBSpendUpdateTransactions(TypedDict): """ Internal Data Structure for buffering spend updates in Redis or in memory before committing them to the database diff --git a/litellm/proxy/db/db_spend_update_writer.py b/litellm/proxy/db/db_spend_update_writer.py index 6d88b3fc46..4247553c30 100644 --- a/litellm/proxy/db/db_spend_update_writer.py +++ b/litellm/proxy/db/db_spend_update_writer.py @@ -10,7 +10,7 @@ import os import time import traceback from datetime import datetime, timedelta -from typing import TYPE_CHECKING, Any, Dict, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Union, cast import litellm from litellm._logging import verbose_proxy_logger @@ -18,6 +18,8 @@ from litellm.caching import DualCache, RedisCache from litellm.constants import DB_SPEND_UPDATE_JOB_NAME from litellm.proxy._types import ( DB_CONNECTION_ERROR_TYPES, + BaseDailySpendTransaction, + DailyTeamSpendTransaction, DailyUserSpendTransaction, DBSpendUpdateTransactions, Litellm_EntityType, @@ -56,6 +58,7 @@ class DBSpendUpdateWriter: self.pod_lock_manager = PodLockManager() self.spend_update_queue = SpendUpdateQueue() self.daily_spend_update_queue = DailySpendUpdateQueue() + self.daily_team_spend_update_queue = DailySpendUpdateQueue() async def update_database( # LiteLLM management object fields @@ -158,6 +161,13 @@ class DBSpendUpdateWriter: ) ) + asyncio.create_task( + self.add_spend_log_transaction_to_daily_team_transaction( + payload=payload, + prisma_client=prisma_client, + ) + ) + verbose_proxy_logger.debug("Runs spend update on all tables") except Exception: verbose_proxy_logger.debug( @@ -381,6 +391,7 @@ class DBSpendUpdateWriter: await self.redis_update_buffer.store_in_memory_spend_updates_in_redis( spend_update_queue=self.spend_update_queue, daily_spend_update_queue=self.daily_spend_update_queue, + daily_team_spend_update_queue=self.daily_team_spend_update_queue, ) # Only commit from redis to db if this pod is the leader @@ -411,6 +422,16 @@ class DBSpendUpdateWriter: proxy_logging_obj=proxy_logging_obj, daily_spend_transactions=daily_spend_update_transactions, ) + daily_team_spend_update_transactions = ( + await self.redis_update_buffer.get_all_daily_team_spend_update_transactions_from_redis_buffer() + ) + if daily_team_spend_update_transactions is not None: + await DBSpendUpdateWriter.update_daily_team_spend( + n_retry_times=n_retry_times, + prisma_client=prisma_client, + proxy_logging_obj=proxy_logging_obj, + daily_spend_transactions=daily_team_spend_update_transactions, + ) except Exception as e: verbose_proxy_logger.error(f"Error committing spend updates: {e}") finally: @@ -446,8 +467,9 @@ class DBSpendUpdateWriter: ################## Daily Spend Update Transactions ################## # Aggregate all in memory daily spend transactions and commit to db - daily_spend_update_transactions = ( - await self.daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions() + daily_spend_update_transactions = cast( + Dict[str, DailyUserSpendTransaction], + await self.daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions(), ) await DBSpendUpdateWriter.update_daily_user_spend( @@ -457,6 +479,20 @@ class DBSpendUpdateWriter: daily_spend_transactions=daily_spend_update_transactions, ) + ################## Daily Team Spend Update Transactions ################## + # Aggregate all in memory daily team spend transactions and commit to db + daily_team_spend_update_transactions = cast( + Dict[str, DailyTeamSpendTransaction], + await self.daily_team_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions(), + ) + + await DBSpendUpdateWriter.update_daily_team_spend( + n_retry_times=n_retry_times, + prisma_client=prisma_client, + proxy_logging_obj=proxy_logging_obj, + daily_spend_transactions=daily_team_spend_update_transactions, + ) + async def _commit_spend_updates_to_db( # noqa: PLR0915 self, prisma_client: PrismaClient, @@ -835,6 +871,187 @@ class DBSpendUpdateWriter: e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj ) + @staticmethod + async def update_daily_team_spend( + n_retry_times: int, + prisma_client: PrismaClient, + proxy_logging_obj: ProxyLogging, + daily_spend_transactions: Dict[str, DailyTeamSpendTransaction], + ): + """ + Batch job to update LiteLLM_DailyTeamSpend table using in-memory daily_spend_transactions + """ + from litellm.proxy.utils import _raise_failed_update_spend_exception + + ### UPDATE DAILY USER SPEND ### + verbose_proxy_logger.debug( + "Daily Team Spend transactions: {}".format(len(daily_spend_transactions)) + ) + BATCH_SIZE = ( + 100 # Number of aggregated records to update in each database operation + ) + start_time = time.time() + + try: + for i in range(n_retry_times + 1): + try: + # Get transactions to process + transactions_to_process = dict( + list(daily_spend_transactions.items())[:BATCH_SIZE] + ) + + if len(transactions_to_process) == 0: + verbose_proxy_logger.debug( + "No new transactions to process for daily spend update" + ) + break + + # Update DailyUserSpend table in batches + async with prisma_client.db.batch_() as batcher: + for _, transaction in transactions_to_process.items(): + team_id = transaction.get("team_id") + if not team_id: # Skip if no team_id + continue + + batcher.litellm_dailyteamspend.upsert( + where={ + "team_id_date_api_key_model_custom_llm_provider": { + "team_id": team_id, + "date": transaction["date"], + "api_key": transaction["api_key"], + "model": transaction["model"], + "custom_llm_provider": transaction.get( + "custom_llm_provider" + ), + } + }, + data={ + "create": { + "team_id": team_id, + "date": transaction["date"], + "api_key": transaction["api_key"], + "model": transaction["model"], + "model_group": transaction.get("model_group"), + "custom_llm_provider": transaction.get( + "custom_llm_provider" + ), + "prompt_tokens": transaction["prompt_tokens"], + "completion_tokens": transaction[ + "completion_tokens" + ], + "spend": transaction["spend"], + "api_requests": transaction["api_requests"], + "successful_requests": transaction[ + "successful_requests" + ], + "failed_requests": transaction[ + "failed_requests" + ], + }, + "update": { + "prompt_tokens": { + "increment": transaction["prompt_tokens"] + }, + "completion_tokens": { + "increment": transaction[ + "completion_tokens" + ] + }, + "spend": {"increment": transaction["spend"]}, + "api_requests": { + "increment": transaction["api_requests"] + }, + "successful_requests": { + "increment": transaction[ + "successful_requests" + ] + }, + "failed_requests": { + "increment": transaction["failed_requests"] + }, + }, + }, + ) + + verbose_proxy_logger.info( + f"Processed {len(transactions_to_process)} daily team transactions in {time.time() - start_time:.2f}s" + ) + + # Remove processed transactions + for key in transactions_to_process.keys(): + daily_spend_transactions.pop(key, None) + + verbose_proxy_logger.debug( + f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s" + ) + break + + except DB_CONNECTION_ERROR_TYPES as e: + if i >= n_retry_times: + _raise_failed_update_spend_exception( + e=e, + start_time=start_time, + proxy_logging_obj=proxy_logging_obj, + ) + await asyncio.sleep(2**i) # Exponential backoff + + except Exception as e: + # Remove processed transactions even if there was an error + if "transactions_to_process" in locals(): + for key in transactions_to_process.keys(): # type: ignore + daily_spend_transactions.pop(key, None) + _raise_failed_update_spend_exception( + e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj + ) + + async def _common_add_spend_log_transaction_to_daily_transaction( + self, + payload: Union[dict, SpendLogsPayload], + prisma_client: PrismaClient, + type: Literal["user", "team"] = "user", + ) -> Optional[BaseDailySpendTransaction]: + common_expected_keys = ["startTime", "api_key", "model", "custom_llm_provider"] + if type == "user": + expected_keys = ["user", *common_expected_keys] + else: + expected_keys = ["team_id", *common_expected_keys] + + if not all(key in payload for key in expected_keys): + verbose_proxy_logger.debug( + f"Missing expected keys: {expected_keys}, in payload, skipping from daily_user_spend_transactions" + ) + return None + + request_status = prisma_client.get_request_status(payload) + verbose_proxy_logger.info(f"Logged request status: {request_status}") + if isinstance(payload["startTime"], datetime): + start_time = payload["startTime"].isoformat() + date = start_time.split("T")[0] + elif isinstance(payload["startTime"], str): + date = payload["startTime"].split("T")[0] + else: + verbose_proxy_logger.debug( + f"Invalid start time: {payload['startTime']}, skipping from daily_user_spend_transactions" + ) + return None + try: + daily_transaction = BaseDailySpendTransaction( + date=date, + api_key=payload["api_key"], + model=payload["model"], + model_group=payload["model_group"], + custom_llm_provider=payload["custom_llm_provider"], + prompt_tokens=payload["prompt_tokens"], + completion_tokens=payload["completion_tokens"], + spend=payload["spend"], + api_requests=1, + successful_requests=1 if request_status == "success" else 0, + failed_requests=1 if request_status != "success" else 0, + ) + return daily_transaction + except Exception as e: + raise e + async def add_spend_log_transaction_to_daily_user_transaction( self, payload: Union[dict, SpendLogsPayload], @@ -852,46 +1069,51 @@ class DBSpendUpdateWriter: "prisma_client is None. Skipping writing spend logs to db." ) return - expected_keys = ["user", "startTime", "api_key", "model", "custom_llm_provider"] - if not all(key in payload for key in expected_keys): + base_daily_transaction = ( + await self._common_add_spend_log_transaction_to_daily_transaction( + payload, prisma_client, "user" + ) + ) + if base_daily_transaction is None: + return + + daily_transaction_key = f"{payload['user']}_{base_daily_transaction['date']}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}" + daily_transaction = DailyUserSpendTransaction( + user_id=payload["user"], **base_daily_transaction + ) + await self.daily_spend_update_queue.add_update( + update={daily_transaction_key: daily_transaction} + ) + + async def add_spend_log_transaction_to_daily_team_transaction( + self, + payload: SpendLogsPayload, + prisma_client: Optional[PrismaClient] = None, + ) -> None: + if prisma_client is None: verbose_proxy_logger.debug( - f"Missing expected keys: {expected_keys}, in payload, skipping from daily_user_spend_transactions" + "prisma_client is None. Skipping writing spend logs to db." ) return - request_status = prisma_client.get_request_status(payload) - verbose_proxy_logger.info(f"Logged request status: {request_status}") - if isinstance(payload["startTime"], datetime): - start_time = payload["startTime"].isoformat() - date = start_time.split("T")[0] - elif isinstance(payload["startTime"], str): - date = payload["startTime"].split("T")[0] - else: + base_daily_transaction = ( + await self._common_add_spend_log_transaction_to_daily_transaction( + payload, prisma_client, "team" + ) + ) + if base_daily_transaction is None: + return + if payload["team_id"] is None: verbose_proxy_logger.debug( - f"Invalid start time: {payload['startTime']}, skipping from daily_user_spend_transactions" + "team_id is None for request. Skipping incrementing team spend." ) return - try: - daily_transaction_key = f"{payload['user']}_{date}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}" - daily_transaction = DailyUserSpendTransaction( - user_id=payload["user"], - date=date, - api_key=payload["api_key"], - model=payload["model"], - model_group=payload["model_group"], - custom_llm_provider=payload["custom_llm_provider"], - prompt_tokens=payload["prompt_tokens"], - completion_tokens=payload["completion_tokens"], - spend=payload["spend"], - api_requests=1, - successful_requests=1 if request_status == "success" else 0, - failed_requests=1 if request_status != "success" else 0, - ) - await self.daily_spend_update_queue.add_update( - update={daily_transaction_key: daily_transaction} - ) - - except Exception as e: - raise e + daily_transaction_key = f"{payload['team_id']}_{base_daily_transaction['date']}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}" + daily_transaction = DailyTeamSpendTransaction( + team_id=payload["team_id"], **base_daily_transaction + ) + await self.daily_team_spend_update_queue.add_update( + update={daily_transaction_key: daily_transaction} + ) diff --git a/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py b/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py index c61d24d50e..c92b4d5ae7 100644 --- a/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py +++ b/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py @@ -3,7 +3,7 @@ from copy import deepcopy from typing import Dict, List, Optional from litellm._logging import verbose_proxy_logger -from litellm.proxy._types import DailyUserSpendTransaction +from litellm.proxy._types import BaseDailySpendTransaction from litellm.proxy.db.db_transaction_queue.base_update_queue import ( BaseUpdateQueue, service_logger_obj, @@ -54,10 +54,10 @@ class DailySpendUpdateQueue(BaseUpdateQueue): def __init__(self): super().__init__() self.update_queue: asyncio.Queue[ - Dict[str, DailyUserSpendTransaction] + Dict[str, BaseDailySpendTransaction] ] = asyncio.Queue() - async def add_update(self, update: Dict[str, DailyUserSpendTransaction]): + async def add_update(self, update: Dict[str, BaseDailySpendTransaction]): """Enqueue an update.""" verbose_proxy_logger.debug("Adding update to queue: %s", update) await self.update_queue.put(update) @@ -73,7 +73,7 @@ class DailySpendUpdateQueue(BaseUpdateQueue): This is used to reduce the size of the in-memory queue. """ updates: List[ - Dict[str, DailyUserSpendTransaction] + Dict[str, BaseDailySpendTransaction] ] = await self.flush_all_updates_from_in_memory_queue() aggregated_updates = self.get_aggregated_daily_spend_update_transactions( updates @@ -82,8 +82,8 @@ class DailySpendUpdateQueue(BaseUpdateQueue): async def flush_and_get_aggregated_daily_spend_update_transactions( self, - ) -> Dict[str, DailyUserSpendTransaction]: - """Get all updates from the queue and return all updates aggregated by daily_transaction_key.""" + ) -> Dict[str, BaseDailySpendTransaction]: + """Get all updates from the queue and return all updates aggregated by daily_transaction_key. Works for both user and team spend updates.""" updates = await self.flush_all_updates_from_in_memory_queue() aggregated_daily_spend_update_transactions = ( DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions( @@ -98,11 +98,11 @@ class DailySpendUpdateQueue(BaseUpdateQueue): @staticmethod def get_aggregated_daily_spend_update_transactions( - updates: List[Dict[str, DailyUserSpendTransaction]] - ) -> Dict[str, DailyUserSpendTransaction]: + updates: List[Dict[str, BaseDailySpendTransaction]] + ) -> Dict[str, BaseDailySpendTransaction]: """Aggregate updates by daily_transaction_key.""" aggregated_daily_spend_update_transactions: Dict[ - str, DailyUserSpendTransaction + str, BaseDailySpendTransaction ] = {} for _update in updates: for _key, payload in _update.items(): diff --git a/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py b/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py index 88741fbb18..828778e288 100644 --- a/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py +++ b/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py @@ -6,17 +6,22 @@ This is to prevent deadlocks and improve reliability import asyncio import json -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast from litellm._logging import verbose_proxy_logger from litellm.caching import RedisCache from litellm.constants import ( MAX_REDIS_BUFFER_DEQUEUE_COUNT, REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY, + REDIS_DAILY_TEAM_SPEND_UPDATE_BUFFER_KEY, REDIS_UPDATE_BUFFER_KEY, ) from litellm.litellm_core_utils.safe_json_dumps import safe_dumps -from litellm.proxy._types import DailyUserSpendTransaction, DBSpendUpdateTransactions +from litellm.proxy._types import ( + DailyTeamSpendTransaction, + DailyUserSpendTransaction, + DBSpendUpdateTransactions, +) from litellm.proxy.db.db_transaction_queue.base_update_queue import service_logger_obj from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import ( DailySpendUpdateQueue, @@ -67,6 +72,7 @@ class RedisUpdateBuffer: self, spend_update_queue: SpendUpdateQueue, daily_spend_update_queue: DailySpendUpdateQueue, + daily_team_spend_update_queue: DailySpendUpdateQueue, ): """ Stores the in-memory spend updates to Redis @@ -127,6 +133,9 @@ class RedisUpdateBuffer: daily_spend_update_transactions = ( await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions() ) + daily_team_spend_update_transactions = ( + await daily_team_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions() + ) verbose_proxy_logger.debug( "ALL DAILY SPEND UPDATE TRANSACTIONS: %s", daily_spend_update_transactions ) @@ -161,6 +170,19 @@ class RedisUpdateBuffer: service=ServiceTypes.REDIS_DAILY_SPEND_UPDATE_QUEUE, ) + list_of_daily_team_spend_update_transactions = [ + safe_dumps(daily_team_spend_update_transactions) + ] + + current_redis_buffer_size = await self.redis_cache.async_rpush( + key=REDIS_DAILY_TEAM_SPEND_UPDATE_BUFFER_KEY, + values=list_of_daily_team_spend_update_transactions, + ) + await self._emit_new_item_added_to_redis_buffer_event( + queue_size=current_redis_buffer_size, + service=ServiceTypes.REDIS_DAILY_TEAM_SPEND_UPDATE_QUEUE, + ) + @staticmethod def _number_of_transactions_to_store_in_redis( db_spend_update_transactions: DBSpendUpdateTransactions, @@ -258,8 +280,35 @@ class RedisUpdateBuffer: list_of_daily_spend_update_transactions = [ json.loads(transaction) for transaction in list_of_transactions ] - return DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions( - list_of_daily_spend_update_transactions + return cast( + Dict[str, DailyUserSpendTransaction], + DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions( + list_of_daily_spend_update_transactions + ), + ) + + async def get_all_daily_team_spend_update_transactions_from_redis_buffer( + self, + ) -> Optional[Dict[str, DailyTeamSpendTransaction]]: + """ + Gets all the daily team spend update transactions from Redis + """ + if self.redis_cache is None: + return None + list_of_transactions = await self.redis_cache.async_lpop( + key=REDIS_DAILY_TEAM_SPEND_UPDATE_BUFFER_KEY, + count=MAX_REDIS_BUFFER_DEQUEUE_COUNT, + ) + if list_of_transactions is None: + return None + list_of_daily_spend_update_transactions = [ + json.loads(transaction) for transaction in list_of_transactions + ] + return cast( + Dict[str, DailyTeamSpendTransaction], + DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions( + list_of_daily_spend_update_transactions + ), ) @staticmethod diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index b2a6b362cf..2a0f791e25 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -340,6 +340,31 @@ model LiteLLM_DailyUserSpend { @@index([model]) } +// Track daily team spend metrics per model and key +model LiteLLM_DailyTeamSpend { + id String @id @default(uuid()) + team_id String + date String + api_key String + model String + model_group String? + custom_llm_provider String? + prompt_tokens Int @default(0) + completion_tokens Int @default(0) + spend Float @default(0.0) + api_requests Int @default(0) + successful_requests Int @default(0) + failed_requests Int @default(0) + created_at DateTime @default(now()) + updated_at DateTime @updatedAt + + @@unique([team_id, date, api_key, model, custom_llm_provider]) + @@index([date]) + @@index([team_id]) + @@index([api_key]) + @@index([model]) +} + // Track the status of cron jobs running. Only allow one pod to run the job at a time model LiteLLM_CronJob { diff --git a/litellm/types/services.py b/litellm/types/services.py index 865827f0f8..6c788c8956 100644 --- a/litellm/types/services.py +++ b/litellm/types/services.py @@ -33,7 +33,7 @@ class ServiceTypes(str, enum.Enum): # daily spend update queue - actual transaction events IN_MEMORY_DAILY_SPEND_UPDATE_QUEUE = "in_memory_daily_spend_update_queue" REDIS_DAILY_SPEND_UPDATE_QUEUE = "redis_daily_spend_update_queue" - + REDIS_DAILY_TEAM_SPEND_UPDATE_QUEUE = "redis_daily_team_spend_update_queue" # spend update queue - current spend of key, user, team IN_MEMORY_SPEND_UPDATE_QUEUE = "in_memory_spend_update_queue" REDIS_SPEND_UPDATE_QUEUE = "redis_spend_update_queue" diff --git a/poetry.lock b/poetry.lock index 19498461d3..a09625d296 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1011,13 +1011,13 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-auth" -version = "2.38.0" +version = "2.39.0" description = "Google Authentication Library" optional = true python-versions = ">=3.7" files = [ - {file = "google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a"}, - {file = "google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4"}, + {file = "google_auth-2.39.0-py2.py3-none-any.whl", hash = "sha256:0150b6711e97fb9f52fe599f55648950cc4540015565d8fbb31be2ad6e1548a2"}, + {file = "google_auth-2.39.0.tar.gz", hash = "sha256:73222d43cdc35a3aeacbfdcaf73142a97839f10de930550d89ebfe1d0a00cde7"}, ] [package.dependencies] @@ -1026,12 +1026,14 @@ pyasn1-modules = ">=0.2.1" rsa = ">=3.1.4,<5" [package.extras] -aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0)", "requests (>=2.20.0,<3.0.0)"] enterprise-cert = ["cryptography", "pyopenssl"] -pyjwt = ["cryptography (>=38.0.3)", "pyjwt (>=2.0)"] -pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] +pyjwt = ["cryptography (<39.0.0)", "cryptography (>=38.0.3)", "pyjwt (>=2.0)"] +pyopenssl = ["cryptography (<39.0.0)", "cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] reauth = ["pyu2f (>=0.1.5)"] -requests = ["requests (>=2.20.0,<3.0.0.dev0)"] +requests = ["requests (>=2.20.0,<3.0.0)"] +testing = ["aiohttp (<3.10.0)", "aiohttp (>=3.6.2,<4.0.0)", "aioresponses", "cryptography (<39.0.0)", "cryptography (>=38.0.3)", "flask", "freezegun", "grpcio", "mock", "oauth2client", "packaging", "pyjwt (>=2.0)", "pyopenssl (<24.3.0)", "pyopenssl (>=20.0.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-localserver", "pyu2f (>=0.1.5)", "requests (>=2.20.0,<3.0.0)", "responses", "urllib3"] +urllib3 = ["packaging", "urllib3"] [[package]] name = "google-cloud-kms" @@ -1053,13 +1055,13 @@ protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4 [[package]] name = "googleapis-common-protos" -version = "1.69.2" +version = "1.70.0" description = "Common protobufs used in Google APIs" optional = true python-versions = ">=3.7" files = [ - {file = "googleapis_common_protos-1.69.2-py3-none-any.whl", hash = "sha256:0b30452ff9c7a27d80bfc5718954063e8ab53dd3697093d3bc99581f5fd24212"}, - {file = "googleapis_common_protos-1.69.2.tar.gz", hash = "sha256:3e1b904a27a33c821b4b749fd31d334c0c9c30e6113023d495e48979a3dc9c5f"}, + {file = "googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8"}, + {file = "googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257"}, ] [package.dependencies] @@ -1680,13 +1682,13 @@ referencing = ">=0.31.0" [[package]] name = "litellm-proxy-extras" -version = "0.1.7" +version = "0.1.8" description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package." optional = true python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ - {file = "litellm_proxy_extras-0.1.7-py3-none-any.whl", hash = "sha256:d07eb1b8827127222c671a4c2a1730975d7e403bb334dbdadb264d64c99c479e"}, - {file = "litellm_proxy_extras-0.1.7.tar.gz", hash = "sha256:d34e4e91edbdac244f51fbfb973fff5a9f23850eff717fbdbdb2af0a9e85ef4a"}, + {file = "litellm_proxy_extras-0.1.8-py3-none-any.whl", hash = "sha256:42f261b66a43bd47a25eee0df547f93e375de208b5cb9da524379626c1632dcb"}, + {file = "litellm_proxy_extras-0.1.8.tar.gz", hash = "sha256:81c18b068184b87eb32088afa50358ac7f27a747d446c949291706bfe8158310"}, ] [[package]] @@ -2180,13 +2182,13 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] [[package]] name = "openai" -version = "1.73.0" +version = "1.74.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" files = [ - {file = "openai-1.73.0-py3-none-any.whl", hash = "sha256:f52d1f673fb4ce6069a40d544a80fcb062eba1b3f489004fac4f9923a074c425"}, - {file = "openai-1.73.0.tar.gz", hash = "sha256:b58ea39ba589de07db85c9905557ac12d2fc77600dcd2b92a08b99c9a3dce9e0"}, + {file = "openai-1.74.0-py3-none-any.whl", hash = "sha256:aff3e0f9fb209836382ec112778667027f4fd6ae38bdb2334bc9e173598b092a"}, + {file = "openai-1.74.0.tar.gz", hash = "sha256:592c25b8747a7cad33a841958f5eb859a785caea9ee22b9e4f4a2ec062236526"}, ] [package.dependencies] @@ -3326,13 +3328,13 @@ files = [ [[package]] name = "rq" -version = "2.3.1" +version = "2.3.2" description = "RQ is a simple, lightweight, library for creating background jobs, and processing them." optional = true python-versions = ">=3.8" files = [ - {file = "rq-2.3.1-py3-none-any.whl", hash = "sha256:2bbd48b976fdd840865dcab4bed358eb94b4dd8a02e92add75a346a909c1793d"}, - {file = "rq-2.3.1.tar.gz", hash = "sha256:9cb33be7a90c6b36c0d6b9a6524aaf85b8855251ace476d74a076e6dfc5684d6"}, + {file = "rq-2.3.2-py3-none-any.whl", hash = "sha256:bf4dc622a7b9d5f7d4a39444f26d89ce6de8a1d6db61b21060612114dbf8d5ff"}, + {file = "rq-2.3.2.tar.gz", hash = "sha256:5bd212992724428ec1689736abde783d245e7856bca39d89845884f5d580f5f1"}, ] [package.dependencies] @@ -4151,4 +4153,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "boto3", "cryptography", "fastapi", [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0, !=3.9.7" -content-hash = "35a6b009d763180a0f7e00c95c9dc21bc07f339e5b2f0dd12f14c908cc1dd0df" +content-hash = "37dd81eae90a4d984b90067ddf934dcfa1ef61f45476b13af0e3634dfa309051" diff --git a/pyproject.toml b/pyproject.toml index 256fe563eb..f165b17692 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ websockets = {version = "^13.1.0", optional = true} boto3 = {version = "1.34.34", optional = true} redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"} mcp = {version = "1.5.0", optional = true, python = ">=3.10"} -litellm-proxy-extras = {version = "0.1.7", optional = true} +litellm-proxy-extras = {version = "0.1.8", optional = true} [tool.poetry.extras] proxy = [ diff --git a/requirements.txt b/requirements.txt index d585eec373..4758d52f0d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -37,7 +37,7 @@ sentry_sdk==2.21.0 # for sentry error handling detect-secrets==1.5.0 # Enterprise - secret detection / masking in LLM requests cryptography==43.0.1 tzdata==2025.1 # IANA time zone database -litellm-proxy-extras==0.1.7 # for proxy extras - e.g. prisma migrations +litellm-proxy-extras==0.1.8 # for proxy extras - e.g. prisma migrations ### LITELLM PACKAGE DEPENDENCIES python-dotenv==1.0.0 # for env tiktoken==0.8.0 # for calculating usage diff --git a/schema.prisma b/schema.prisma index b2a6b362cf..2a0f791e25 100644 --- a/schema.prisma +++ b/schema.prisma @@ -340,6 +340,31 @@ model LiteLLM_DailyUserSpend { @@index([model]) } +// Track daily team spend metrics per model and key +model LiteLLM_DailyTeamSpend { + id String @id @default(uuid()) + team_id String + date String + api_key String + model String + model_group String? + custom_llm_provider String? + prompt_tokens Int @default(0) + completion_tokens Int @default(0) + spend Float @default(0.0) + api_requests Int @default(0) + successful_requests Int @default(0) + failed_requests Int @default(0) + created_at DateTime @default(now()) + updated_at DateTime @updatedAt + + @@unique([team_id, date, api_key, model, custom_llm_provider]) + @@index([date]) + @@index([team_id]) + @@index([api_key]) + @@index([model]) +} + // Track the status of cron jobs running. Only allow one pod to run the job at a time model LiteLLM_CronJob { From 65f8015221d8425369b0139449a779e080280bb0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 15 Apr 2025 21:08:48 -0700 Subject: [PATCH 12/13] test fix - azure deprecated azure ai mistral --- tests/local_testing/test_completion_cost.py | 23 --------------------- 1 file changed, 23 deletions(-) diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py index af89c38789..3e30041489 100644 --- a/tests/local_testing/test_completion_cost.py +++ b/tests/local_testing/test_completion_cost.py @@ -864,29 +864,6 @@ def test_vertex_ai_embedding_completion_cost(caplog): # assert False - -def test_completion_azure_ai(): - try: - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") - - litellm.set_verbose = True - response = litellm.completion( - model="azure_ai/Mistral-large-nmefg", - messages=[{"content": "what llm are you", "role": "user"}], - max_tokens=15, - num_retries=3, - api_base=os.getenv("AZURE_AI_MISTRAL_API_BASE"), - api_key=os.getenv("AZURE_AI_MISTRAL_API_KEY"), - ) - print(response) - - assert "response_cost" in response._hidden_params - assert isinstance(response._hidden_params["response_cost"], float) - except Exception as e: - pytest.fail(f"Error occurred: {e}") - - @pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.asyncio async def test_completion_cost_hidden_params(sync_mode): From 70d740332fb6d28e32668d0d28e4ca76c9b60e0c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 15 Apr 2025 21:10:08 -0700 Subject: [PATCH 13/13] [UI Polish] UI fixes for cache control injection settings (#10031) * ui fixes for cache control * docs inject cache control settings --- .../src/components/add_model/advanced_settings.tsx | 12 ++++++------ .../components/add_model/cache_control_settings.tsx | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/ui/litellm-dashboard/src/components/add_model/advanced_settings.tsx b/ui/litellm-dashboard/src/components/add_model/advanced_settings.tsx index 7a1cb93f4c..8ced27cd62 100644 --- a/ui/litellm-dashboard/src/components/add_model/advanced_settings.tsx +++ b/ui/litellm-dashboard/src/components/add_model/advanced_settings.tsx @@ -170,12 +170,6 @@ const AdvancedSettings: React.FC = ({ )} - - = ({ className="bg-gray-600" /> + + = ({ {showCacheControl && (
- Specify either a role (to cache all messages of that role) or a specific message index. - If both are provided, the index takes precedence. + Providers like Anthropic, Bedrock API require users to specify where to inject cache control checkpoints, + litellm can automatically add them for you as a cost saving feature. = ({ name={[field.name, 'role']} className="mb-0" style={{ width: '180px' }} - tooltip="Select a role to cache all messages of this type" + tooltip="LiteLLM will mark all messages of this role as cacheable" >