diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index 1637366ec1..bd8e2116c2 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -334,7 +334,6 @@ router_settings: | AZURE_STORAGE_TENANT_ID | The Application Tenant ID to use for Authentication to Azure Blob Storage logging | AZURE_STORAGE_CLIENT_ID | The Application Client ID to use for Authentication to Azure Blob Storage logging | AZURE_STORAGE_CLIENT_SECRET | The Application Client Secret to use for Authentication to Azure Blob Storage logging - | BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service | BRAINTRUST_API_KEY | API key for Braintrust integration | CIRCLE_OIDC_TOKEN | OpenID Connect token for CircleCI diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md index e13a403634..c8731dd270 100644 --- a/docs/my-website/docs/proxy/logging.md +++ b/docs/my-website/docs/proxy/logging.md @@ -862,7 +862,7 @@ Add the following to your env ```shell OTEL_EXPORTER="otlp_http" -OTEL_ENDPOINT="http:/0.0.0.0:4317" +OTEL_ENDPOINT="http://0.0.0.0:4317" OTEL_HEADERS="x-honeycomb-team=" # Optional ``` @@ -2501,4 +2501,4 @@ litellm_settings: :::info `thresholds` are not required by default, but you can tune the values to your needs. Default values is `4` for all categories -::: --> \ No newline at end of file +::: --> diff --git a/docs/my-website/release_notes/v1.57.8-stable/index.md b/docs/my-website/release_notes/v1.57.8-stable/index.md index ea712f0f77..78fe13f2ed 100644 --- a/docs/my-website/release_notes/v1.57.8-stable/index.md +++ b/docs/my-website/release_notes/v1.57.8-stable/index.md @@ -38,7 +38,7 @@ hide_table_of_contents: false 2. OpenAI Moderations - `omni-moderation-latest` support. [Start Here](https://docs.litellm.ai/docs/moderation) 3. Azure O1 - fake streaming support. This ensures if a `stream=true` is passed, the response is streamed. [Start Here](https://docs.litellm.ai/docs/providers/azure) 4. Anthropic - non-whitespace char stop sequence handling - [PR](https://github.com/BerriAI/litellm/pull/7484) -5. Azure OpenAI - support Entra id username + password based auth. [Start Here](https://docs.litellm.ai/docs/providers/azure#entrata-id---use-tenant_id-client_id-client_secret) +5. Azure OpenAI - support Entra ID username + password based auth. [Start Here](https://docs.litellm.ai/docs/providers/azure#entra-id---use-tenant_id-client_id-client_secret) 6. LM Studio - embedding route support. [Start Here](https://docs.litellm.ai/docs/providers/lm-studio) 7. WatsonX - ZenAPIKeyAuth support. [Start Here](https://docs.litellm.ai/docs/providers/watsonx) diff --git a/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.8-py3-none-any.whl b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.8-py3-none-any.whl new file mode 100644 index 0000000000..b4a2ca73d2 Binary files /dev/null and b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.8-py3-none-any.whl differ diff --git a/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.8.tar.gz b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.8.tar.gz new file mode 100644 index 0000000000..a254112d2b Binary files /dev/null and b/litellm-proxy-extras/dist/litellm_proxy_extras-0.1.8.tar.gz differ diff --git a/litellm-proxy-extras/litellm_proxy_extras/migrations/20250415191926_add_daily_team_table/migration.sql b/litellm-proxy-extras/litellm_proxy_extras/migrations/20250415191926_add_daily_team_table/migration.sql new file mode 100644 index 0000000000..a6eb461bc2 --- /dev/null +++ b/litellm-proxy-extras/litellm_proxy_extras/migrations/20250415191926_add_daily_team_table/migration.sql @@ -0,0 +1,36 @@ +-- CreateTable +CREATE TABLE "LiteLLM_DailyTeamSpend" ( + "id" TEXT NOT NULL, + "team_id" TEXT NOT NULL, + "date" TEXT NOT NULL, + "api_key" TEXT NOT NULL, + "model" TEXT NOT NULL, + "model_group" TEXT, + "custom_llm_provider" TEXT, + "prompt_tokens" INTEGER NOT NULL DEFAULT 0, + "completion_tokens" INTEGER NOT NULL DEFAULT 0, + "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0, + "api_requests" INTEGER NOT NULL DEFAULT 0, + "successful_requests" INTEGER NOT NULL DEFAULT 0, + "failed_requests" INTEGER NOT NULL DEFAULT 0, + "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updated_at" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "LiteLLM_DailyTeamSpend_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE INDEX "LiteLLM_DailyTeamSpend_date_idx" ON "LiteLLM_DailyTeamSpend"("date"); + +-- CreateIndex +CREATE INDEX "LiteLLM_DailyTeamSpend_team_id_idx" ON "LiteLLM_DailyTeamSpend"("team_id"); + +-- CreateIndex +CREATE INDEX "LiteLLM_DailyTeamSpend_api_key_idx" ON "LiteLLM_DailyTeamSpend"("api_key"); + +-- CreateIndex +CREATE INDEX "LiteLLM_DailyTeamSpend_model_idx" ON "LiteLLM_DailyTeamSpend"("model"); + +-- CreateIndex +CREATE UNIQUE INDEX "LiteLLM_DailyTeamSpend_team_id_date_api_key_model_custom_ll_key" ON "LiteLLM_DailyTeamSpend"("team_id", "date", "api_key", "model", "custom_llm_provider"); + diff --git a/litellm-proxy-extras/pyproject.toml b/litellm-proxy-extras/pyproject.toml index 8fd40a78ab..58577b8292 100644 --- a/litellm-proxy-extras/pyproject.toml +++ b/litellm-proxy-extras/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm-proxy-extras" -version = "0.1.7" +version = "0.1.8" description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package." authors = ["BerriAI"] readme = "README.md" @@ -22,7 +22,7 @@ requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "0.1.7" +version = "0.1.8" version_files = [ "pyproject.toml:version", "../requirements.txt:litellm-proxy-extras==", diff --git a/litellm/constants.py b/litellm/constants.py index 12bfd17815..0f35520b2e 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -24,6 +24,7 @@ SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests ########### v2 Architecture constants for managing writing updates to the database ########### REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer" REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_spend_update_buffer" +REDIS_DAILY_TEAM_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_team_spend_update_buffer" MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100 MAX_SIZE_IN_MEMORY_QUEUE = 10000 MAX_IN_MEMORY_QUEUE_FLUSH_COUNT = 1000 diff --git a/litellm/integrations/azure_storage/azure_storage.py b/litellm/integrations/azure_storage/azure_storage.py index 27f5e0e112..24a3f5749a 100644 --- a/litellm/integrations/azure_storage/azure_storage.py +++ b/litellm/integrations/azure_storage/azure_storage.py @@ -8,7 +8,7 @@ from typing import List, Optional from litellm._logging import verbose_logger from litellm.constants import AZURE_STORAGE_MSFT_VERSION from litellm.integrations.custom_batch_logger import CustomBatchLogger -from litellm.llms.azure.common_utils import get_azure_ad_token_from_entrata_id +from litellm.llms.azure.common_utils import get_azure_ad_token_from_entra_id from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, get_async_httpx_client, @@ -291,7 +291,7 @@ class AzureBlobStorageLogger(CustomBatchLogger): "Missing required environment variable: AZURE_STORAGE_CLIENT_SECRET" ) - token_provider = get_azure_ad_token_from_entrata_id( + token_provider = get_azure_ad_token_from_entra_id( tenant_id=tenant_id, client_id=client_id, client_secret=client_secret, diff --git a/litellm/llms/azure/common_utils.py b/litellm/llms/azure/common_utils.py index e190c8bfd7..012f47c851 100644 --- a/litellm/llms/azure/common_utils.py +++ b/litellm/llms/azure/common_utils.py @@ -61,7 +61,7 @@ def process_azure_headers(headers: Union[httpx.Headers, dict]) -> dict: return {**llm_response_headers, **openai_headers} -def get_azure_ad_token_from_entrata_id( +def get_azure_ad_token_from_entra_id( tenant_id: str, client_id: str, client_secret: str, @@ -81,7 +81,7 @@ def get_azure_ad_token_from_entrata_id( """ from azure.identity import ClientSecretCredential, get_bearer_token_provider - verbose_logger.debug("Getting Azure AD Token from Entrata ID") + verbose_logger.debug("Getting Azure AD Token from Entra ID") if tenant_id.startswith("os.environ/"): _tenant_id = get_secret_str(tenant_id) @@ -324,9 +324,9 @@ class BaseAzureLLM(BaseOpenAILLM): timeout = litellm_params.get("timeout") if not api_key and tenant_id and client_id and client_secret: verbose_logger.debug( - "Using Azure AD Token Provider from Entrata ID for Azure Auth" + "Using Azure AD Token Provider from Entra ID for Azure Auth" ) - azure_ad_token_provider = get_azure_ad_token_from_entrata_id( + azure_ad_token_provider = get_azure_ad_token_from_entra_id( tenant_id=tenant_id, client_id=client_id, client_secret=client_secret, diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index e25b52e2b8..deb3d799db 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -2769,8 +2769,7 @@ class DefaultInternalUserParams(LiteLLMPydanticObjectBase): ) -class DailyUserSpendTransaction(TypedDict): - user_id: str +class BaseDailySpendTransaction(TypedDict): date: str api_key: str model: str @@ -2790,6 +2789,14 @@ class DailyUserSpendTransaction(TypedDict): failed_requests: int +class DailyTeamSpendTransaction(BaseDailySpendTransaction): + team_id: str + + +class DailyUserSpendTransaction(BaseDailySpendTransaction): + user_id: str + + class DBSpendUpdateTransactions(TypedDict): """ Internal Data Structure for buffering spend updates in Redis or in memory before committing them to the database diff --git a/litellm/proxy/db/db_spend_update_writer.py b/litellm/proxy/db/db_spend_update_writer.py index f986cf70b4..e6bc0c3b7a 100644 --- a/litellm/proxy/db/db_spend_update_writer.py +++ b/litellm/proxy/db/db_spend_update_writer.py @@ -11,7 +11,7 @@ import os import time import traceback from datetime import datetime, timedelta -from typing import TYPE_CHECKING, Any, Dict, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Union, cast import litellm from litellm._logging import verbose_proxy_logger @@ -19,6 +19,8 @@ from litellm.caching import DualCache, RedisCache from litellm.constants import DB_SPEND_UPDATE_JOB_NAME from litellm.proxy._types import ( DB_CONNECTION_ERROR_TYPES, + BaseDailySpendTransaction, + DailyTeamSpendTransaction, DailyUserSpendTransaction, DBSpendUpdateTransactions, Litellm_EntityType, @@ -58,6 +60,7 @@ class DBSpendUpdateWriter: self.pod_lock_manager = PodLockManager() self.spend_update_queue = SpendUpdateQueue() self.daily_spend_update_queue = DailySpendUpdateQueue() + self.daily_team_spend_update_queue = DailySpendUpdateQueue() async def update_database( # LiteLLM management object fields @@ -160,6 +163,13 @@ class DBSpendUpdateWriter: ) ) + asyncio.create_task( + self.add_spend_log_transaction_to_daily_team_transaction( + payload=payload, + prisma_client=prisma_client, + ) + ) + verbose_proxy_logger.debug("Runs spend update on all tables") except Exception: verbose_proxy_logger.debug( @@ -383,6 +393,7 @@ class DBSpendUpdateWriter: await self.redis_update_buffer.store_in_memory_spend_updates_in_redis( spend_update_queue=self.spend_update_queue, daily_spend_update_queue=self.daily_spend_update_queue, + daily_team_spend_update_queue=self.daily_team_spend_update_queue, ) # Only commit from redis to db if this pod is the leader @@ -413,6 +424,16 @@ class DBSpendUpdateWriter: proxy_logging_obj=proxy_logging_obj, daily_spend_transactions=daily_spend_update_transactions, ) + daily_team_spend_update_transactions = ( + await self.redis_update_buffer.get_all_daily_team_spend_update_transactions_from_redis_buffer() + ) + if daily_team_spend_update_transactions is not None: + await DBSpendUpdateWriter.update_daily_team_spend( + n_retry_times=n_retry_times, + prisma_client=prisma_client, + proxy_logging_obj=proxy_logging_obj, + daily_spend_transactions=daily_team_spend_update_transactions, + ) except Exception as e: verbose_proxy_logger.error(f"Error committing spend updates: {e}") finally: @@ -448,8 +469,9 @@ class DBSpendUpdateWriter: ################## Daily Spend Update Transactions ################## # Aggregate all in memory daily spend transactions and commit to db - daily_spend_update_transactions = ( - await self.daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions() + daily_spend_update_transactions = cast( + Dict[str, DailyUserSpendTransaction], + await self.daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions(), ) await DBSpendUpdateWriter.update_daily_user_spend( @@ -459,6 +481,20 @@ class DBSpendUpdateWriter: daily_spend_transactions=daily_spend_update_transactions, ) + ################## Daily Team Spend Update Transactions ################## + # Aggregate all in memory daily team spend transactions and commit to db + daily_team_spend_update_transactions = cast( + Dict[str, DailyTeamSpendTransaction], + await self.daily_team_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions(), + ) + + await DBSpendUpdateWriter.update_daily_team_spend( + n_retry_times=n_retry_times, + prisma_client=prisma_client, + proxy_logging_obj=proxy_logging_obj, + daily_spend_transactions=daily_team_spend_update_transactions, + ) + async def _commit_spend_updates_to_db( # noqa: PLR0915 self, prisma_client: PrismaClient, @@ -853,6 +889,195 @@ class DBSpendUpdateWriter: e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj ) + @staticmethod + async def update_daily_team_spend( + n_retry_times: int, + prisma_client: PrismaClient, + proxy_logging_obj: ProxyLogging, + daily_spend_transactions: Dict[str, DailyTeamSpendTransaction], + ): + """ + Batch job to update LiteLLM_DailyTeamSpend table using in-memory daily_spend_transactions + """ + from litellm.proxy.utils import _raise_failed_update_spend_exception + + ### UPDATE DAILY USER SPEND ### + verbose_proxy_logger.debug( + "Daily Team Spend transactions: {}".format(len(daily_spend_transactions)) + ) + BATCH_SIZE = ( + 100 # Number of aggregated records to update in each database operation + ) + start_time = time.time() + + try: + for i in range(n_retry_times + 1): + try: + # Get transactions to process + transactions_to_process = dict( + list(daily_spend_transactions.items())[:BATCH_SIZE] + ) + + if len(transactions_to_process) == 0: + verbose_proxy_logger.debug( + "No new transactions to process for daily spend update" + ) + break + + # Update DailyUserSpend table in batches + async with prisma_client.db.batch_() as batcher: + for _, transaction in transactions_to_process.items(): + team_id = transaction.get("team_id") + if not team_id: # Skip if no team_id + continue + + batcher.litellm_dailyteamspend.upsert( + where={ + "team_id_date_api_key_model_custom_llm_provider": { + "team_id": team_id, + "date": transaction["date"], + "api_key": transaction["api_key"], + "model": transaction["model"], + "custom_llm_provider": transaction.get( + "custom_llm_provider" + ), + } + }, + data={ + "create": { + "team_id": team_id, + "date": transaction["date"], + "api_key": transaction["api_key"], + "model": transaction["model"], + "model_group": transaction.get("model_group"), + "custom_llm_provider": transaction.get( + "custom_llm_provider" + ), + "prompt_tokens": transaction["prompt_tokens"], + "completion_tokens": transaction[ + "completion_tokens" + ], + "spend": transaction["spend"], + "api_requests": transaction["api_requests"], + "successful_requests": transaction[ + "successful_requests" + ], + "failed_requests": transaction[ + "failed_requests" + ], + }, + "update": { + "prompt_tokens": { + "increment": transaction["prompt_tokens"] + }, + "completion_tokens": { + "increment": transaction[ + "completion_tokens" + ] + }, + "spend": {"increment": transaction["spend"]}, + "api_requests": { + "increment": transaction["api_requests"] + }, + "successful_requests": { + "increment": transaction[ + "successful_requests" + ] + }, + "failed_requests": { + "increment": transaction["failed_requests"] + }, + }, + }, + ) + + verbose_proxy_logger.info( + f"Processed {len(transactions_to_process)} daily team transactions in {time.time() - start_time:.2f}s" + ) + + # Remove processed transactions + for key in transactions_to_process.keys(): + daily_spend_transactions.pop(key, None) + + verbose_proxy_logger.debug( + f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s" + ) + break + + except DB_CONNECTION_ERROR_TYPES as e: + if i >= n_retry_times: + _raise_failed_update_spend_exception( + e=e, + start_time=start_time, + proxy_logging_obj=proxy_logging_obj, + ) + await asyncio.sleep(2**i) # Exponential backoff + + except Exception as e: + # Remove processed transactions even if there was an error + if "transactions_to_process" in locals(): + for key in transactions_to_process.keys(): # type: ignore + daily_spend_transactions.pop(key, None) + _raise_failed_update_spend_exception( + e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj + ) + + async def _common_add_spend_log_transaction_to_daily_transaction( + self, + payload: Union[dict, SpendLogsPayload], + prisma_client: PrismaClient, + type: Literal["user", "team"] = "user", + ) -> Optional[BaseDailySpendTransaction]: + common_expected_keys = ["startTime", "api_key", "model", "custom_llm_provider"] + if type == "user": + expected_keys = ["user", *common_expected_keys] + else: + expected_keys = ["team_id", *common_expected_keys] + + if not all(key in payload for key in expected_keys): + verbose_proxy_logger.debug( + f"Missing expected keys: {expected_keys}, in payload, skipping from daily_user_spend_transactions" + ) + return None + + request_status = prisma_client.get_request_status(payload) + verbose_proxy_logger.info(f"Logged request status: {request_status}") + _metadata: SpendLogsMetadata = json.loads(payload["metadata"]) + usage_obj = _metadata.get("usage_object", {}) or {} + if isinstance(payload["startTime"], datetime): + start_time = payload["startTime"].isoformat() + date = start_time.split("T")[0] + elif isinstance(payload["startTime"], str): + date = payload["startTime"].split("T")[0] + else: + verbose_proxy_logger.debug( + f"Invalid start time: {payload['startTime']}, skipping from daily_user_spend_transactions" + ) + return None + try: + daily_transaction = BaseDailySpendTransaction( + date=date, + api_key=payload["api_key"], + model=payload["model"], + model_group=payload["model_group"], + custom_llm_provider=payload["custom_llm_provider"], + prompt_tokens=payload["prompt_tokens"], + completion_tokens=payload["completion_tokens"], + spend=payload["spend"], + api_requests=1, + successful_requests=1 if request_status == "success" else 0, + failed_requests=1 if request_status != "success" else 0, + cache_read_input_tokens=usage_obj.get("cache_read_input_tokens", 0) + or 0, + cache_creation_input_tokens=usage_obj.get( + "cache_creation_input_tokens", 0 + ) + or 0, + ) + return daily_transaction + except Exception as e: + raise e + async def add_spend_log_transaction_to_daily_user_transaction( self, payload: Union[dict, SpendLogsPayload], @@ -870,55 +1095,51 @@ class DBSpendUpdateWriter: "prisma_client is None. Skipping writing spend logs to db." ) return - expected_keys = ["user", "startTime", "api_key", "model", "custom_llm_provider"] - if not all(key in payload for key in expected_keys): - verbose_proxy_logger.debug( - f"Missing expected keys: {expected_keys}, in payload, skipping from daily_user_spend_transactions" + base_daily_transaction = ( + await self._common_add_spend_log_transaction_to_daily_transaction( + payload, prisma_client, "user" ) + ) + if base_daily_transaction is None: return - request_status = prisma_client.get_request_status(payload) - verbose_proxy_logger.info(f"Logged request status: {request_status}") - _metadata: SpendLogsMetadata = json.loads(payload["metadata"]) - usage_obj = _metadata.get("usage_object", {}) or {} - cache_read_input_tokens = usage_obj.get("cache_read_input_tokens", 0) or 0 - cache_creation_input_tokens = ( - usage_obj.get("cache_creation_input_tokens", 0) or 0 + daily_transaction_key = f"{payload['user']}_{base_daily_transaction['date']}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}" + daily_transaction = DailyUserSpendTransaction( + user_id=payload["user"], **base_daily_transaction + ) + await self.daily_spend_update_queue.add_update( + update={daily_transaction_key: daily_transaction} ) - if isinstance(payload["startTime"], datetime): - start_time = payload["startTime"].isoformat() - date = start_time.split("T")[0] - elif isinstance(payload["startTime"], str): - date = payload["startTime"].split("T")[0] - else: + async def add_spend_log_transaction_to_daily_team_transaction( + self, + payload: SpendLogsPayload, + prisma_client: Optional[PrismaClient] = None, + ) -> None: + if prisma_client is None: verbose_proxy_logger.debug( - f"Invalid start time: {payload['startTime']}, skipping from daily_user_spend_transactions" + "prisma_client is None. Skipping writing spend logs to db." ) return - try: - daily_transaction_key = f"{payload['user']}_{date}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}" - daily_transaction = DailyUserSpendTransaction( - user_id=payload["user"], - date=date, - api_key=payload["api_key"], - model=payload["model"], - model_group=payload["model_group"], - custom_llm_provider=payload["custom_llm_provider"], - prompt_tokens=payload["prompt_tokens"], - completion_tokens=payload["completion_tokens"], - spend=payload["spend"], - api_requests=1, - successful_requests=1 if request_status == "success" else 0, - failed_requests=1 if request_status != "success" else 0, - cache_read_input_tokens=cache_read_input_tokens, - cache_creation_input_tokens=cache_creation_input_tokens, - ) - await self.daily_spend_update_queue.add_update( - update={daily_transaction_key: daily_transaction} + base_daily_transaction = ( + await self._common_add_spend_log_transaction_to_daily_transaction( + payload, prisma_client, "team" ) + ) + if base_daily_transaction is None: + return + if payload["team_id"] is None: + verbose_proxy_logger.debug( + "team_id is None for request. Skipping incrementing team spend." + ) + return - except Exception as e: - raise e + daily_transaction_key = f"{payload['team_id']}_{base_daily_transaction['date']}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}" + daily_transaction = DailyTeamSpendTransaction( + team_id=payload["team_id"], **base_daily_transaction + ) + await self.daily_team_spend_update_queue.add_update( + update={daily_transaction_key: daily_transaction} + ) diff --git a/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py b/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py index 191b577897..c3074e641b 100644 --- a/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py +++ b/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py @@ -3,7 +3,7 @@ from copy import deepcopy from typing import Dict, List, Optional from litellm._logging import verbose_proxy_logger -from litellm.proxy._types import DailyUserSpendTransaction +from litellm.proxy._types import BaseDailySpendTransaction from litellm.proxy.db.db_transaction_queue.base_update_queue import ( BaseUpdateQueue, service_logger_obj, @@ -53,11 +53,11 @@ class DailySpendUpdateQueue(BaseUpdateQueue): def __init__(self): super().__init__() - self.update_queue: asyncio.Queue[Dict[str, DailyUserSpendTransaction]] = ( + self.update_queue: asyncio.Queue[Dict[str, BaseDailySpendTransaction]] = ( asyncio.Queue() ) - async def add_update(self, update: Dict[str, DailyUserSpendTransaction]): + async def add_update(self, update: Dict[str, BaseDailySpendTransaction]): """Enqueue an update.""" verbose_proxy_logger.debug("Adding update to queue: %s", update) await self.update_queue.put(update) @@ -72,7 +72,7 @@ class DailySpendUpdateQueue(BaseUpdateQueue): Combine all updates in the queue into a single update. This is used to reduce the size of the in-memory queue. """ - updates: List[Dict[str, DailyUserSpendTransaction]] = ( + updates: List[Dict[str, BaseDailySpendTransaction]] = ( await self.flush_all_updates_from_in_memory_queue() ) aggregated_updates = self.get_aggregated_daily_spend_update_transactions( @@ -82,8 +82,8 @@ class DailySpendUpdateQueue(BaseUpdateQueue): async def flush_and_get_aggregated_daily_spend_update_transactions( self, - ) -> Dict[str, DailyUserSpendTransaction]: - """Get all updates from the queue and return all updates aggregated by daily_transaction_key.""" + ) -> Dict[str, BaseDailySpendTransaction]: + """Get all updates from the queue and return all updates aggregated by daily_transaction_key. Works for both user and team spend updates.""" updates = await self.flush_all_updates_from_in_memory_queue() aggregated_daily_spend_update_transactions = ( DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions( @@ -98,11 +98,11 @@ class DailySpendUpdateQueue(BaseUpdateQueue): @staticmethod def get_aggregated_daily_spend_update_transactions( - updates: List[Dict[str, DailyUserSpendTransaction]], - ) -> Dict[str, DailyUserSpendTransaction]: + updates: List[Dict[str, BaseDailySpendTransaction]], + ) -> Dict[str, BaseDailySpendTransaction]: """Aggregate updates by daily_transaction_key.""" aggregated_daily_spend_update_transactions: Dict[ - str, DailyUserSpendTransaction + str, BaseDailySpendTransaction ] = {} for _update in updates: for _key, payload in _update.items(): diff --git a/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py b/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py index 88741fbb18..828778e288 100644 --- a/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py +++ b/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py @@ -6,17 +6,22 @@ This is to prevent deadlocks and improve reliability import asyncio import json -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast from litellm._logging import verbose_proxy_logger from litellm.caching import RedisCache from litellm.constants import ( MAX_REDIS_BUFFER_DEQUEUE_COUNT, REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY, + REDIS_DAILY_TEAM_SPEND_UPDATE_BUFFER_KEY, REDIS_UPDATE_BUFFER_KEY, ) from litellm.litellm_core_utils.safe_json_dumps import safe_dumps -from litellm.proxy._types import DailyUserSpendTransaction, DBSpendUpdateTransactions +from litellm.proxy._types import ( + DailyTeamSpendTransaction, + DailyUserSpendTransaction, + DBSpendUpdateTransactions, +) from litellm.proxy.db.db_transaction_queue.base_update_queue import service_logger_obj from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import ( DailySpendUpdateQueue, @@ -67,6 +72,7 @@ class RedisUpdateBuffer: self, spend_update_queue: SpendUpdateQueue, daily_spend_update_queue: DailySpendUpdateQueue, + daily_team_spend_update_queue: DailySpendUpdateQueue, ): """ Stores the in-memory spend updates to Redis @@ -127,6 +133,9 @@ class RedisUpdateBuffer: daily_spend_update_transactions = ( await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions() ) + daily_team_spend_update_transactions = ( + await daily_team_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions() + ) verbose_proxy_logger.debug( "ALL DAILY SPEND UPDATE TRANSACTIONS: %s", daily_spend_update_transactions ) @@ -161,6 +170,19 @@ class RedisUpdateBuffer: service=ServiceTypes.REDIS_DAILY_SPEND_UPDATE_QUEUE, ) + list_of_daily_team_spend_update_transactions = [ + safe_dumps(daily_team_spend_update_transactions) + ] + + current_redis_buffer_size = await self.redis_cache.async_rpush( + key=REDIS_DAILY_TEAM_SPEND_UPDATE_BUFFER_KEY, + values=list_of_daily_team_spend_update_transactions, + ) + await self._emit_new_item_added_to_redis_buffer_event( + queue_size=current_redis_buffer_size, + service=ServiceTypes.REDIS_DAILY_TEAM_SPEND_UPDATE_QUEUE, + ) + @staticmethod def _number_of_transactions_to_store_in_redis( db_spend_update_transactions: DBSpendUpdateTransactions, @@ -258,8 +280,35 @@ class RedisUpdateBuffer: list_of_daily_spend_update_transactions = [ json.loads(transaction) for transaction in list_of_transactions ] - return DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions( - list_of_daily_spend_update_transactions + return cast( + Dict[str, DailyUserSpendTransaction], + DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions( + list_of_daily_spend_update_transactions + ), + ) + + async def get_all_daily_team_spend_update_transactions_from_redis_buffer( + self, + ) -> Optional[Dict[str, DailyTeamSpendTransaction]]: + """ + Gets all the daily team spend update transactions from Redis + """ + if self.redis_cache is None: + return None + list_of_transactions = await self.redis_cache.async_lpop( + key=REDIS_DAILY_TEAM_SPEND_UPDATE_BUFFER_KEY, + count=MAX_REDIS_BUFFER_DEQUEUE_COUNT, + ) + if list_of_transactions is None: + return None + list_of_daily_spend_update_transactions = [ + json.loads(transaction) for transaction in list_of_transactions + ] + return cast( + Dict[str, DailyTeamSpendTransaction], + DailySpendUpdateQueue.get_aggregated_daily_spend_update_transactions( + list_of_daily_spend_update_transactions + ), ) @staticmethod diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index b470eba64e..845f05f14b 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -342,6 +342,31 @@ model LiteLLM_DailyUserSpend { @@index([model]) } +// Track daily team spend metrics per model and key +model LiteLLM_DailyTeamSpend { + id String @id @default(uuid()) + team_id String + date String + api_key String + model String + model_group String? + custom_llm_provider String? + prompt_tokens Int @default(0) + completion_tokens Int @default(0) + spend Float @default(0.0) + api_requests Int @default(0) + successful_requests Int @default(0) + failed_requests Int @default(0) + created_at DateTime @default(now()) + updated_at DateTime @updatedAt + + @@unique([team_id, date, api_key, model, custom_llm_provider]) + @@index([date]) + @@index([team_id]) + @@index([api_key]) + @@index([model]) +} + // Track the status of cron jobs running. Only allow one pod to run the job at a time model LiteLLM_CronJob { diff --git a/litellm/types/services.py b/litellm/types/services.py index 865827f0f8..6c788c8956 100644 --- a/litellm/types/services.py +++ b/litellm/types/services.py @@ -33,7 +33,7 @@ class ServiceTypes(str, enum.Enum): # daily spend update queue - actual transaction events IN_MEMORY_DAILY_SPEND_UPDATE_QUEUE = "in_memory_daily_spend_update_queue" REDIS_DAILY_SPEND_UPDATE_QUEUE = "redis_daily_spend_update_queue" - + REDIS_DAILY_TEAM_SPEND_UPDATE_QUEUE = "redis_daily_team_spend_update_queue" # spend update queue - current spend of key, user, team IN_MEMORY_SPEND_UPDATE_QUEUE = "in_memory_spend_update_queue" REDIS_SPEND_UPDATE_QUEUE = "redis_spend_update_queue" diff --git a/poetry.lock b/poetry.lock index 19498461d3..a09625d296 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1011,13 +1011,13 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-auth" -version = "2.38.0" +version = "2.39.0" description = "Google Authentication Library" optional = true python-versions = ">=3.7" files = [ - {file = "google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a"}, - {file = "google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4"}, + {file = "google_auth-2.39.0-py2.py3-none-any.whl", hash = "sha256:0150b6711e97fb9f52fe599f55648950cc4540015565d8fbb31be2ad6e1548a2"}, + {file = "google_auth-2.39.0.tar.gz", hash = "sha256:73222d43cdc35a3aeacbfdcaf73142a97839f10de930550d89ebfe1d0a00cde7"}, ] [package.dependencies] @@ -1026,12 +1026,14 @@ pyasn1-modules = ">=0.2.1" rsa = ">=3.1.4,<5" [package.extras] -aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0)", "requests (>=2.20.0,<3.0.0)"] enterprise-cert = ["cryptography", "pyopenssl"] -pyjwt = ["cryptography (>=38.0.3)", "pyjwt (>=2.0)"] -pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] +pyjwt = ["cryptography (<39.0.0)", "cryptography (>=38.0.3)", "pyjwt (>=2.0)"] +pyopenssl = ["cryptography (<39.0.0)", "cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] reauth = ["pyu2f (>=0.1.5)"] -requests = ["requests (>=2.20.0,<3.0.0.dev0)"] +requests = ["requests (>=2.20.0,<3.0.0)"] +testing = ["aiohttp (<3.10.0)", "aiohttp (>=3.6.2,<4.0.0)", "aioresponses", "cryptography (<39.0.0)", "cryptography (>=38.0.3)", "flask", "freezegun", "grpcio", "mock", "oauth2client", "packaging", "pyjwt (>=2.0)", "pyopenssl (<24.3.0)", "pyopenssl (>=20.0.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-localserver", "pyu2f (>=0.1.5)", "requests (>=2.20.0,<3.0.0)", "responses", "urllib3"] +urllib3 = ["packaging", "urllib3"] [[package]] name = "google-cloud-kms" @@ -1053,13 +1055,13 @@ protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4 [[package]] name = "googleapis-common-protos" -version = "1.69.2" +version = "1.70.0" description = "Common protobufs used in Google APIs" optional = true python-versions = ">=3.7" files = [ - {file = "googleapis_common_protos-1.69.2-py3-none-any.whl", hash = "sha256:0b30452ff9c7a27d80bfc5718954063e8ab53dd3697093d3bc99581f5fd24212"}, - {file = "googleapis_common_protos-1.69.2.tar.gz", hash = "sha256:3e1b904a27a33c821b4b749fd31d334c0c9c30e6113023d495e48979a3dc9c5f"}, + {file = "googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8"}, + {file = "googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257"}, ] [package.dependencies] @@ -1680,13 +1682,13 @@ referencing = ">=0.31.0" [[package]] name = "litellm-proxy-extras" -version = "0.1.7" +version = "0.1.8" description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package." optional = true python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ - {file = "litellm_proxy_extras-0.1.7-py3-none-any.whl", hash = "sha256:d07eb1b8827127222c671a4c2a1730975d7e403bb334dbdadb264d64c99c479e"}, - {file = "litellm_proxy_extras-0.1.7.tar.gz", hash = "sha256:d34e4e91edbdac244f51fbfb973fff5a9f23850eff717fbdbdb2af0a9e85ef4a"}, + {file = "litellm_proxy_extras-0.1.8-py3-none-any.whl", hash = "sha256:42f261b66a43bd47a25eee0df547f93e375de208b5cb9da524379626c1632dcb"}, + {file = "litellm_proxy_extras-0.1.8.tar.gz", hash = "sha256:81c18b068184b87eb32088afa50358ac7f27a747d446c949291706bfe8158310"}, ] [[package]] @@ -2180,13 +2182,13 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] [[package]] name = "openai" -version = "1.73.0" +version = "1.74.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" files = [ - {file = "openai-1.73.0-py3-none-any.whl", hash = "sha256:f52d1f673fb4ce6069a40d544a80fcb062eba1b3f489004fac4f9923a074c425"}, - {file = "openai-1.73.0.tar.gz", hash = "sha256:b58ea39ba589de07db85c9905557ac12d2fc77600dcd2b92a08b99c9a3dce9e0"}, + {file = "openai-1.74.0-py3-none-any.whl", hash = "sha256:aff3e0f9fb209836382ec112778667027f4fd6ae38bdb2334bc9e173598b092a"}, + {file = "openai-1.74.0.tar.gz", hash = "sha256:592c25b8747a7cad33a841958f5eb859a785caea9ee22b9e4f4a2ec062236526"}, ] [package.dependencies] @@ -3326,13 +3328,13 @@ files = [ [[package]] name = "rq" -version = "2.3.1" +version = "2.3.2" description = "RQ is a simple, lightweight, library for creating background jobs, and processing them." optional = true python-versions = ">=3.8" files = [ - {file = "rq-2.3.1-py3-none-any.whl", hash = "sha256:2bbd48b976fdd840865dcab4bed358eb94b4dd8a02e92add75a346a909c1793d"}, - {file = "rq-2.3.1.tar.gz", hash = "sha256:9cb33be7a90c6b36c0d6b9a6524aaf85b8855251ace476d74a076e6dfc5684d6"}, + {file = "rq-2.3.2-py3-none-any.whl", hash = "sha256:bf4dc622a7b9d5f7d4a39444f26d89ce6de8a1d6db61b21060612114dbf8d5ff"}, + {file = "rq-2.3.2.tar.gz", hash = "sha256:5bd212992724428ec1689736abde783d245e7856bca39d89845884f5d580f5f1"}, ] [package.dependencies] @@ -4151,4 +4153,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "boto3", "cryptography", "fastapi", [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0, !=3.9.7" -content-hash = "35a6b009d763180a0f7e00c95c9dc21bc07f339e5b2f0dd12f14c908cc1dd0df" +content-hash = "37dd81eae90a4d984b90067ddf934dcfa1ef61f45476b13af0e3634dfa309051" diff --git a/pyproject.toml b/pyproject.toml index 92066d0105..f165b17692 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.66.1" +version = "1.66.2" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -55,7 +55,7 @@ websockets = {version = "^13.1.0", optional = true} boto3 = {version = "1.34.34", optional = true} redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"} mcp = {version = "1.5.0", optional = true, python = ">=3.10"} -litellm-proxy-extras = {version = "0.1.7", optional = true} +litellm-proxy-extras = {version = "0.1.8", optional = true} [tool.poetry.extras] proxy = [ @@ -118,7 +118,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.66.1" +version = "1.66.2" version_files = [ "pyproject.toml:^version" ] diff --git a/requirements.txt b/requirements.txt index d585eec373..4758d52f0d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -37,7 +37,7 @@ sentry_sdk==2.21.0 # for sentry error handling detect-secrets==1.5.0 # Enterprise - secret detection / masking in LLM requests cryptography==43.0.1 tzdata==2025.1 # IANA time zone database -litellm-proxy-extras==0.1.7 # for proxy extras - e.g. prisma migrations +litellm-proxy-extras==0.1.8 # for proxy extras - e.g. prisma migrations ### LITELLM PACKAGE DEPENDENCIES python-dotenv==1.0.0 # for env tiktoken==0.8.0 # for calculating usage diff --git a/schema.prisma b/schema.prisma index b470eba64e..845f05f14b 100644 --- a/schema.prisma +++ b/schema.prisma @@ -342,6 +342,31 @@ model LiteLLM_DailyUserSpend { @@index([model]) } +// Track daily team spend metrics per model and key +model LiteLLM_DailyTeamSpend { + id String @id @default(uuid()) + team_id String + date String + api_key String + model String + model_group String? + custom_llm_provider String? + prompt_tokens Int @default(0) + completion_tokens Int @default(0) + spend Float @default(0.0) + api_requests Int @default(0) + successful_requests Int @default(0) + failed_requests Int @default(0) + created_at DateTime @default(now()) + updated_at DateTime @updatedAt + + @@unique([team_id, date, api_key, model, custom_llm_provider]) + @@index([date]) + @@index([team_id]) + @@index([api_key]) + @@index([model]) +} + // Track the status of cron jobs running. Only allow one pod to run the job at a time model LiteLLM_CronJob { diff --git a/tests/litellm/llms/azure/test_azure_common_utils.py b/tests/litellm/llms/azure/test_azure_common_utils.py index bbd2a282fc..42b5903ee8 100644 --- a/tests/litellm/llms/azure/test_azure_common_utils.py +++ b/tests/litellm/llms/azure/test_azure_common_utils.py @@ -19,8 +19,8 @@ from litellm.types.utils import CallTypes @pytest.fixture def setup_mocks(): with patch( - "litellm.llms.azure.common_utils.get_azure_ad_token_from_entrata_id" - ) as mock_entrata_token, patch( + "litellm.llms.azure.common_utils.get_azure_ad_token_from_entra_id" + ) as mock_entra_token, patch( "litellm.llms.azure.common_utils.get_azure_ad_token_from_username_password" ) as mock_username_password_token, patch( "litellm.llms.azure.common_utils.get_azure_ad_token_from_oidc" @@ -37,7 +37,7 @@ def setup_mocks(): mock_litellm.AZURE_DEFAULT_API_VERSION = "2023-05-15" mock_litellm.enable_azure_ad_token_refresh = False - mock_entrata_token.return_value = lambda: "mock-entrata-token" + mock_entra_token.return_value = lambda: "mock-entra-token" mock_username_password_token.return_value = ( lambda: "mock-username-password-token" ) @@ -49,7 +49,7 @@ def setup_mocks(): ) yield { - "entrata_token": mock_entrata_token, + "entra_token": mock_entra_token, "username_password_token": mock_username_password_token, "oidc_token": mock_oidc_token, "token_provider": mock_token_provider, @@ -92,8 +92,8 @@ def test_initialize_with_tenant_credentials_env_var(setup_mocks, monkeypatch): is_async=False, ) - # Verify that get_azure_ad_token_from_entrata_id was called - setup_mocks["entrata_token"].assert_called_once_with( + # Verify that get_azure_ad_token_from_entra_id was called + setup_mocks["entra_token"].assert_called_once_with( tenant_id="test-tenant-id", client_id="test-client-id", client_secret="test-client-secret", @@ -120,8 +120,8 @@ def test_initialize_with_tenant_credentials(setup_mocks): is_async=False, ) - # Verify that get_azure_ad_token_from_entrata_id was called - setup_mocks["entrata_token"].assert_called_once_with( + # Verify that get_azure_ad_token_from_entra_id was called + setup_mocks["entra_token"].assert_called_once_with( tenant_id="test-tenant-id", client_id="test-client-id", client_secret="test-client-secret", diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py index af89c38789..3e30041489 100644 --- a/tests/local_testing/test_completion_cost.py +++ b/tests/local_testing/test_completion_cost.py @@ -864,29 +864,6 @@ def test_vertex_ai_embedding_completion_cost(caplog): # assert False - -def test_completion_azure_ai(): - try: - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") - - litellm.set_verbose = True - response = litellm.completion( - model="azure_ai/Mistral-large-nmefg", - messages=[{"content": "what llm are you", "role": "user"}], - max_tokens=15, - num_retries=3, - api_base=os.getenv("AZURE_AI_MISTRAL_API_BASE"), - api_key=os.getenv("AZURE_AI_MISTRAL_API_KEY"), - ) - print(response) - - assert "response_cost" in response._hidden_params - assert isinstance(response._hidden_params["response_cost"], float) - except Exception as e: - pytest.fail(f"Error occurred: {e}") - - @pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.asyncio async def test_completion_cost_hidden_params(sync_mode): diff --git a/ui/litellm-dashboard/package-lock.json b/ui/litellm-dashboard/package-lock.json index 18c2eaeace..6d38a7d70b 100644 --- a/ui/litellm-dashboard/package-lock.json +++ b/ui/litellm-dashboard/package-lock.json @@ -133,9 +133,10 @@ } }, "node_modules/@babel/runtime": { - "version": "7.23.9", - "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.23.9.tgz", - "integrity": "sha512-0CX6F+BI2s9dkUqr08KFrAIZgNFj75rdBU/DjCyYLIaV/quFjkk6T+EJ2LkZHyZTbEV4L5p97mNkUsHl2wLFAw==", + "version": "7.27.0", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.27.0.tgz", + "integrity": "sha512-VtPOkrdPHZsKc/clNqyi9WUA8TINkZ4cGk63UUE3u4pmB2k+ZMQRDuIOagv8UVd6j7k0T3+RRIb7beKTebNbcw==", + "license": "MIT", "dependencies": { "regenerator-runtime": "^0.14.0" }, diff --git a/ui/litellm-dashboard/src/components/add_model/advanced_settings.tsx b/ui/litellm-dashboard/src/components/add_model/advanced_settings.tsx index 7a1cb93f4c..8ced27cd62 100644 --- a/ui/litellm-dashboard/src/components/add_model/advanced_settings.tsx +++ b/ui/litellm-dashboard/src/components/add_model/advanced_settings.tsx @@ -170,12 +170,6 @@ const AdvancedSettings: React.FC = ({ )} - - = ({ className="bg-gray-600" /> + + = ({ return ( <> = ({ {showCacheControl && (
- Specify either a role (to cache all messages of that role) or a specific message index. - If both are provided, the index takes precedence. + Providers like Anthropic, Bedrock API require users to specify where to inject cache control checkpoints, + litellm can automatically add them for you as a cost saving feature. = ({ name={[field.name, 'role']} className="mb-0" style={{ width: '180px' }} - tooltip="Select a role to cache all messages of this type" + tooltip="LiteLLM will mark all messages of this role as cacheable" >