Merge branch 'BerriAI:main' into NolanTrem/GeminiSystemMessages

This commit is contained in:
Nolan Tremelling 2025-04-15 13:38:12 -07:00 committed by GitHub
commit adc4f161da
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
74 changed files with 2462 additions and 538 deletions

1
.gitignore vendored
View file

@ -73,6 +73,7 @@ tests/local_testing/log.txt
.codegpt
litellm/proxy/_new_new_secret_config.yaml
litellm/proxy/custom_guardrail.py
.mypy_cache/*
litellm/proxy/_experimental/out/404.html
litellm/proxy/_experimental/out/404.html
litellm/proxy/_experimental/out/model_hub.html

View file

@ -4,7 +4,7 @@ Pass-through endpoints for Cohere - call provider-specific endpoint, in native f
| Feature | Supported | Notes |
|-------|-------|-------|
| Cost Tracking | ❌ | [Tell us if you need this](https://github.com/BerriAI/litellm/issues/new) |
| Cost Tracking | ✅ | works across all integrations |
| Logging | ✅ | works across all integrations |
| End-user Tracking | ❌ | [Tell us if you need this](https://github.com/BerriAI/litellm/issues/new) |
| Streaming | ✅ | |

View file

@ -323,6 +323,9 @@ router_settings:
| AZURE_AUTHORITY_HOST | Azure authority host URL
| AZURE_CLIENT_ID | Client ID for Azure services
| AZURE_CLIENT_SECRET | Client secret for Azure services
| AZURE_TENANT_ID | Tenant ID for Azure Active Directory
| AZURE_USERNAME | Username for Azure services, use in conjunction with AZURE_PASSWORD for azure ad token with basic username/password workflow
| AZURE_PASSWORD | Password for Azure services, use in conjunction with AZURE_USERNAME for azure ad token with basic username/password workflow
| AZURE_FEDERATED_TOKEN_FILE | File path to Azure federated token
| AZURE_KEY_VAULT_URI | URI for Azure Key Vault
| AZURE_STORAGE_ACCOUNT_KEY | The Azure Storage Account Key to use for Authentication to Azure Blob Storage logging
@ -331,7 +334,7 @@ router_settings:
| AZURE_STORAGE_TENANT_ID | The Application Tenant ID to use for Authentication to Azure Blob Storage logging
| AZURE_STORAGE_CLIENT_ID | The Application Client ID to use for Authentication to Azure Blob Storage logging
| AZURE_STORAGE_CLIENT_SECRET | The Application Client Secret to use for Authentication to Azure Blob Storage logging
| AZURE_TENANT_ID | Tenant ID for Azure Active Directory
| BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service
| BRAINTRUST_API_KEY | API key for Braintrust integration
| CIRCLE_OIDC_TOKEN | OpenID Connect token for CircleCI
@ -433,6 +436,7 @@ router_settings:
| LITERAL_BATCH_SIZE | Batch size for Literal operations
| LITELLM_DONT_SHOW_FEEDBACK_BOX | Flag to hide feedback box in LiteLLM UI
| LITELLM_DROP_PARAMS | Parameters to drop in LiteLLM requests
| LITELLM_MODIFY_PARAMS | Parameters to modify in LiteLLM requests
| LITELLM_EMAIL | Email associated with LiteLLM account
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRIES | Maximum retries for parallel requests in LiteLLM
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRY_TIMEOUT | Timeout for retries of parallel requests in LiteLLM
@ -446,6 +450,8 @@ router_settings:
| LITELLM_TOKEN | Access token for LiteLLM integration
| LITELLM_PRINT_STANDARD_LOGGING_PAYLOAD | If true, prints the standard logging payload to the console - useful for debugging
| LOGFIRE_TOKEN | Token for Logfire logging service
| MISTRAL_API_BASE | Base URL for Mistral API
| MISTRAL_API_KEY | API key for Mistral API
| MICROSOFT_CLIENT_ID | Client ID for Microsoft services
| MICROSOFT_CLIENT_SECRET | Client secret for Microsoft services
| MICROSOFT_TENANT | Tenant ID for Microsoft Azure

View file

@ -188,7 +188,7 @@ const sidebars = {
"providers/azure_ai",
"providers/aiml",
"providers/vertex",
{
type: "category",
label: "Google AI Studio",
@ -340,7 +340,7 @@ const sidebars = {
},
"rerank",
"assistants",
{
type: "category",
label: "/files",
@ -407,9 +407,9 @@ const sidebars = {
type: "category",
label: "Logging & Observability",
items: [
"observability/langfuse_integration",
"observability/lunary_integration",
"observability/mlflow",
"observability/langfuse_integration",
"observability/gcs_bucket_integration",
"observability/langsmith_integration",
"observability/literalai_integration",

View file

@ -113,6 +113,7 @@ _custom_logger_compatible_callbacks_literal = Literal[
"pagerduty",
"humanloop",
"gcs_pubsub",
"anthropic_cache_control_hook",
]
logged_real_time_event_types: Optional[Union[List[str], Literal["*"]]] = None
_known_custom_logger_compatible_callbacks: List = list(
@ -127,19 +128,19 @@ prometheus_initialize_budget_metrics: Optional[bool] = False
require_auth_for_metrics_endpoint: Optional[bool] = False
argilla_batch_size: Optional[int] = None
datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload
gcs_pub_sub_use_v1: Optional[bool] = (
False # if you want to use v1 gcs pubsub logged payload
)
gcs_pub_sub_use_v1: Optional[
bool
] = False # if you want to use v1 gcs pubsub logged payload
argilla_transformation_object: Optional[Dict[str, Any]] = None
_async_input_callback: List[Union[str, Callable, CustomLogger]] = (
[]
) # internal variable - async custom callbacks are routed here.
_async_success_callback: List[Union[str, Callable, CustomLogger]] = (
[]
) # internal variable - async custom callbacks are routed here.
_async_failure_callback: List[Union[str, Callable, CustomLogger]] = (
[]
) # internal variable - async custom callbacks are routed here.
_async_input_callback: List[
Union[str, Callable, CustomLogger]
] = [] # internal variable - async custom callbacks are routed here.
_async_success_callback: List[
Union[str, Callable, CustomLogger]
] = [] # internal variable - async custom callbacks are routed here.
_async_failure_callback: List[
Union[str, Callable, CustomLogger]
] = [] # internal variable - async custom callbacks are routed here.
pre_call_rules: List[Callable] = []
post_call_rules: List[Callable] = []
turn_off_message_logging: Optional[bool] = False
@ -147,22 +148,22 @@ log_raw_request_response: bool = False
redact_messages_in_exceptions: Optional[bool] = False
redact_user_api_key_info: Optional[bool] = False
filter_invalid_headers: Optional[bool] = False
add_user_information_to_llm_headers: Optional[bool] = (
None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
)
add_user_information_to_llm_headers: Optional[
bool
] = None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
store_audit_logs = False # Enterprise feature, allow users to see audit logs
### end of callbacks #############
email: Optional[str] = (
None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
)
token: Optional[str] = (
None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
)
email: Optional[
str
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
token: Optional[
str
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
telemetry = True
max_tokens: int = DEFAULT_MAX_TOKENS # OpenAI Defaults
drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False))
modify_params = False
modify_params = bool(os.getenv("LITELLM_MODIFY_PARAMS", False))
retry = True
### AUTH ###
api_key: Optional[str] = None
@ -234,24 +235,20 @@ enable_loadbalancing_on_batch_endpoints: Optional[bool] = None
enable_caching_on_provider_specific_optional_params: bool = (
False # feature-flag for caching on optional params - e.g. 'top_k'
)
caching: bool = (
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
)
caching_with_models: bool = (
False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
)
cache: Optional[Cache] = (
None # cache object <- use this - https://docs.litellm.ai/docs/caching
)
caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
cache: Optional[
Cache
] = None # cache object <- use this - https://docs.litellm.ai/docs/caching
default_in_memory_ttl: Optional[float] = None
default_redis_ttl: Optional[float] = None
default_redis_batch_cache_expiry: Optional[float] = None
model_alias_map: Dict[str, str] = {}
model_group_alias_map: Dict[str, str] = {}
max_budget: float = 0.0 # set the max budget across all providers
budget_duration: Optional[str] = (
None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
)
budget_duration: Optional[
str
] = None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
default_soft_budget: float = (
DEFAULT_SOFT_BUDGET # by default all litellm proxy keys have a soft budget of 50.0
)
@ -260,15 +257,11 @@ forward_traceparent_to_llm_provider: bool = False
_current_cost = 0.0 # private variable, used if max budget is set
error_logs: Dict = {}
add_function_to_prompt: bool = (
False # if function calling not supported by api, append function call details to system prompt
)
add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
client_session: Optional[httpx.Client] = None
aclient_session: Optional[httpx.AsyncClient] = None
model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
model_cost_map_url: str = (
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
)
model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
suppress_debug_info = False
dynamodb_table_name: Optional[str] = None
s3_callback_params: Optional[Dict] = None
@ -291,9 +284,7 @@ disable_end_user_cost_tracking_prometheus_only: Optional[bool] = None
custom_prometheus_metadata_labels: List[str] = []
#### REQUEST PRIORITIZATION ####
priority_reservation: Optional[Dict[str, float]] = None
force_ipv4: bool = (
False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
)
force_ipv4: bool = False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
module_level_aclient = AsyncHTTPHandler(
timeout=request_timeout, client_alias="module level aclient"
)
@ -307,13 +298,13 @@ fallbacks: Optional[List] = None
context_window_fallbacks: Optional[List] = None
content_policy_fallbacks: Optional[List] = None
allowed_fails: int = 3
num_retries_per_request: Optional[int] = (
None # for the request overall (incl. fallbacks + model retries)
)
num_retries_per_request: Optional[
int
] = None # for the request overall (incl. fallbacks + model retries)
####### SECRET MANAGERS #####################
secret_manager_client: Optional[Any] = (
None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
)
secret_manager_client: Optional[
Any
] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
_google_kms_resource_name: Optional[str] = None
_key_management_system: Optional[KeyManagementSystem] = None
_key_management_settings: KeyManagementSettings = KeyManagementSettings()
@ -324,6 +315,7 @@ from litellm.litellm_core_utils.get_model_cost_map import get_model_cost_map
model_cost = get_model_cost_map(url=model_cost_map_url)
custom_prompt_dict: Dict[str, dict] = {}
check_provider_endpoint = False
####### THREAD-SPECIFIC DATA ####################
@ -1063,10 +1055,10 @@ from .types.llms.custom_llm import CustomLLMItem
from .types.utils import GenericStreamingChunk
custom_provider_map: List[CustomLLMItem] = []
_custom_providers: List[str] = (
[]
) # internal helper util, used to track names of custom providers
disable_hf_tokenizer_download: Optional[bool] = (
None # disable huggingface tokenizer download. Defaults to openai clk100
)
_custom_providers: List[
str
] = [] # internal helper util, used to track names of custom providers
disable_hf_tokenizer_download: Optional[
bool
] = None # disable huggingface tokenizer download. Defaults to openai clk100
global_disable_no_log_param: bool = False

View file

@ -0,0 +1,150 @@
"""
This hook is used to inject cache control directives into the messages of a chat completion.
Users can define
- `cache_control_injection_points` in the completion params and litellm will inject the cache control directives into the messages at the specified injection points.
"""
import copy
from typing import Dict, List, Optional, Tuple, Union, cast
from litellm.integrations.custom_logger import CustomLogger
from litellm.integrations.custom_prompt_management import CustomPromptManagement
from litellm.types.integrations.anthropic_cache_control_hook import (
CacheControlInjectionPoint,
CacheControlMessageInjectionPoint,
)
from litellm.types.llms.openai import AllMessageValues, ChatCompletionCachedContent
from litellm.types.utils import StandardCallbackDynamicParams
class AnthropicCacheControlHook(CustomPromptManagement):
def get_chat_completion_prompt(
self,
model: str,
messages: List[AllMessageValues],
non_default_params: dict,
prompt_id: Optional[str],
prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[str, List[AllMessageValues], dict]:
"""
Apply cache control directives based on specified injection points.
Returns:
- model: str - the model to use
- messages: List[AllMessageValues] - messages with applied cache controls
- non_default_params: dict - params with any global cache controls
"""
# Extract cache control injection points
injection_points: List[CacheControlInjectionPoint] = non_default_params.pop(
"cache_control_injection_points", []
)
if not injection_points:
return model, messages, non_default_params
# Create a deep copy of messages to avoid modifying the original list
processed_messages = copy.deepcopy(messages)
# Process message-level cache controls
for point in injection_points:
if point.get("location") == "message":
point = cast(CacheControlMessageInjectionPoint, point)
processed_messages = self._process_message_injection(
point=point, messages=processed_messages
)
return model, processed_messages, non_default_params
@staticmethod
def _process_message_injection(
point: CacheControlMessageInjectionPoint, messages: List[AllMessageValues]
) -> List[AllMessageValues]:
"""Process message-level cache control injection."""
control: ChatCompletionCachedContent = point.get(
"control", None
) or ChatCompletionCachedContent(type="ephemeral")
_targetted_index: Optional[Union[int, str]] = point.get("index", None)
targetted_index: Optional[int] = None
if isinstance(_targetted_index, str):
if _targetted_index.isdigit():
targetted_index = int(_targetted_index)
else:
targetted_index = _targetted_index
targetted_role = point.get("role", None)
# Case 1: Target by specific index
if targetted_index is not None:
if 0 <= targetted_index < len(messages):
messages[targetted_index] = (
AnthropicCacheControlHook._safe_insert_cache_control_in_message(
messages[targetted_index], control
)
)
# Case 2: Target by role
elif targetted_role is not None:
for msg in messages:
if msg.get("role") == targetted_role:
msg = (
AnthropicCacheControlHook._safe_insert_cache_control_in_message(
message=msg, control=control
)
)
return messages
@staticmethod
def _safe_insert_cache_control_in_message(
message: AllMessageValues, control: ChatCompletionCachedContent
) -> AllMessageValues:
"""
Safe way to insert cache control in a message
OpenAI Message content can be either:
- string
- list of objects
This method handles inserting cache control in both cases.
"""
message_content = message.get("content", None)
# 1. if string, insert cache control in the message
if isinstance(message_content, str):
message["cache_control"] = control # type: ignore
# 2. list of objects
elif isinstance(message_content, list):
for content_item in message_content:
if isinstance(content_item, dict):
content_item["cache_control"] = control # type: ignore
return message
@property
def integration_name(self) -> str:
"""Return the integration name for this hook."""
return "anthropic_cache_control_hook"
@staticmethod
def should_use_anthropic_cache_control_hook(non_default_params: Dict) -> bool:
if non_default_params.get("cache_control_injection_points", None):
return True
return False
@staticmethod
def get_custom_logger_for_anthropic_cache_control_hook(
non_default_params: Dict,
) -> Optional[CustomLogger]:
from litellm.litellm_core_utils.litellm_logging import (
_init_custom_logger_compatible_class,
)
if AnthropicCacheControlHook.should_use_anthropic_cache_control_hook(
non_default_params
):
return _init_custom_logger_compatible_class(
logging_integration="anthropic_cache_control_hook",
internal_usage_cache=None,
llm_router=None,
)
return None

View file

@ -94,7 +94,7 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
model: str,
messages: List[AllMessageValues],
non_default_params: dict,
prompt_id: str,
prompt_id: Optional[str],
prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[str, List[AllMessageValues], dict]:

View file

@ -15,7 +15,7 @@ class CustomPromptManagement(CustomLogger, PromptManagementBase):
model: str,
messages: List[AllMessageValues],
non_default_params: dict,
prompt_id: str,
prompt_id: Optional[str],
prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[str, List[AllMessageValues], dict]:

View file

@ -152,14 +152,21 @@ class HumanloopLogger(CustomLogger):
model: str,
messages: List[AllMessageValues],
non_default_params: dict,
prompt_id: str,
prompt_id: Optional[str],
prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[str, List[AllMessageValues], dict,]:
) -> Tuple[
str,
List[AllMessageValues],
dict,
]:
humanloop_api_key = dynamic_callback_params.get(
"humanloop_api_key"
) or get_secret_str("HUMANLOOP_API_KEY")
if prompt_id is None:
raise ValueError("prompt_id is required for Humanloop integration")
if humanloop_api_key is None:
return super().get_chat_completion_prompt(
model=model,

View file

@ -169,10 +169,14 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
model: str,
messages: List[AllMessageValues],
non_default_params: dict,
prompt_id: str,
prompt_id: Optional[str],
prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[str, List[AllMessageValues], dict,]:
) -> Tuple[
str,
List[AllMessageValues],
dict,
]:
return self.get_chat_completion_prompt(
model,
messages,

View file

@ -79,10 +79,12 @@ class PromptManagementBase(ABC):
model: str,
messages: List[AllMessageValues],
non_default_params: dict,
prompt_id: str,
prompt_id: Optional[str],
prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[str, List[AllMessageValues], dict,]:
) -> Tuple[str, List[AllMessageValues], dict]:
if prompt_id is None:
raise ValueError("prompt_id is required for Prompt Management Base class")
if not self.should_run_prompt_management(
prompt_id=prompt_id, dynamic_callback_params=dynamic_callback_params
):

View file

@ -36,6 +36,7 @@ from litellm.cost_calculator import (
RealtimeAPITokenUsageProcessor,
_select_model_name_for_cost_calc,
)
from litellm.integrations.anthropic_cache_control_hook import AnthropicCacheControlHook
from litellm.integrations.arize.arize import ArizeLogger
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_logger import CustomLogger
@ -249,9 +250,9 @@ class Logging(LiteLLMLoggingBaseClass):
self.litellm_trace_id = litellm_trace_id
self.function_id = function_id
self.streaming_chunks: List[Any] = [] # for generating complete stream response
self.sync_streaming_chunks: List[
Any
] = [] # for generating complete stream response
self.sync_streaming_chunks: List[Any] = (
[]
) # for generating complete stream response
self.log_raw_request_response = log_raw_request_response
# Initialize dynamic callbacks
@ -455,19 +456,38 @@ class Logging(LiteLLMLoggingBaseClass):
if "custom_llm_provider" in self.model_call_details:
self.custom_llm_provider = self.model_call_details["custom_llm_provider"]
def should_run_prompt_management_hooks(
self,
non_default_params: Dict,
prompt_id: Optional[str] = None,
) -> bool:
"""
Return True if prompt management hooks should be run
"""
if prompt_id:
return True
if AnthropicCacheControlHook.should_use_anthropic_cache_control_hook(
non_default_params
):
return True
return False
def get_chat_completion_prompt(
self,
model: str,
messages: List[AllMessageValues],
non_default_params: dict,
prompt_id: str,
non_default_params: Dict,
prompt_id: Optional[str],
prompt_variables: Optional[dict],
prompt_management_logger: Optional[CustomLogger] = None,
) -> Tuple[str, List[AllMessageValues], dict]:
custom_logger = (
prompt_management_logger
or self.get_custom_logger_for_prompt_management(model)
or self.get_custom_logger_for_prompt_management(
model=model, non_default_params=non_default_params
)
)
if custom_logger:
(
model,
@ -476,7 +496,7 @@ class Logging(LiteLLMLoggingBaseClass):
) = custom_logger.get_chat_completion_prompt(
model=model,
messages=messages,
non_default_params=non_default_params,
non_default_params=non_default_params or {},
prompt_id=prompt_id,
prompt_variables=prompt_variables,
dynamic_callback_params=self.standard_callback_dynamic_params,
@ -485,7 +505,7 @@ class Logging(LiteLLMLoggingBaseClass):
return model, messages, non_default_params
def get_custom_logger_for_prompt_management(
self, model: str
self, model: str, non_default_params: Dict
) -> Optional[CustomLogger]:
"""
Get a custom logger for prompt management based on model name or available callbacks.
@ -520,6 +540,26 @@ class Logging(LiteLLMLoggingBaseClass):
self.model_call_details["prompt_integration"] = logger.__class__.__name__
return logger
if anthropic_cache_control_logger := AnthropicCacheControlHook.get_custom_logger_for_anthropic_cache_control_hook(
non_default_params
):
self.model_call_details["prompt_integration"] = (
anthropic_cache_control_logger.__class__.__name__
)
return anthropic_cache_control_logger
return None
def get_custom_logger_for_anthropic_cache_control_hook(
self, non_default_params: Dict
) -> Optional[CustomLogger]:
if non_default_params.get("cache_control_injection_points", None):
custom_logger = _init_custom_logger_compatible_class(
logging_integration="anthropic_cache_control_hook",
internal_usage_cache=None,
llm_router=None,
)
return custom_logger
return None
def _get_raw_request_body(self, data: Optional[Union[dict, str]]) -> dict:
@ -557,9 +597,9 @@ class Logging(LiteLLMLoggingBaseClass):
model
): # if model name was changes pre-call, overwrite the initial model call name with the new one
self.model_call_details["model"] = model
self.model_call_details["litellm_params"][
"api_base"
] = self._get_masked_api_base(additional_args.get("api_base", ""))
self.model_call_details["litellm_params"]["api_base"] = (
self._get_masked_api_base(additional_args.get("api_base", ""))
)
def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915
# Log the exact input to the LLM API
@ -588,10 +628,10 @@ class Logging(LiteLLMLoggingBaseClass):
try:
# [Non-blocking Extra Debug Information in metadata]
if turn_off_message_logging is True:
_metadata[
"raw_request"
] = "redacted by litellm. \
_metadata["raw_request"] = (
"redacted by litellm. \
'litellm.turn_off_message_logging=True'"
)
else:
curl_command = self._get_request_curl_command(
api_base=additional_args.get("api_base", ""),
@ -602,32 +642,32 @@ class Logging(LiteLLMLoggingBaseClass):
_metadata["raw_request"] = str(curl_command)
# split up, so it's easier to parse in the UI
self.model_call_details[
"raw_request_typed_dict"
] = RawRequestTypedDict(
raw_request_api_base=str(
additional_args.get("api_base") or ""
),
raw_request_body=self._get_raw_request_body(
additional_args.get("complete_input_dict", {})
),
raw_request_headers=self._get_masked_headers(
additional_args.get("headers", {}) or {},
ignore_sensitive_headers=True,
),
error=None,
self.model_call_details["raw_request_typed_dict"] = (
RawRequestTypedDict(
raw_request_api_base=str(
additional_args.get("api_base") or ""
),
raw_request_body=self._get_raw_request_body(
additional_args.get("complete_input_dict", {})
),
raw_request_headers=self._get_masked_headers(
additional_args.get("headers", {}) or {},
ignore_sensitive_headers=True,
),
error=None,
)
)
except Exception as e:
self.model_call_details[
"raw_request_typed_dict"
] = RawRequestTypedDict(
error=str(e),
self.model_call_details["raw_request_typed_dict"] = (
RawRequestTypedDict(
error=str(e),
)
)
_metadata[
"raw_request"
] = "Unable to Log \
_metadata["raw_request"] = (
"Unable to Log \
raw request: {}".format(
str(e)
str(e)
)
)
if self.logger_fn and callable(self.logger_fn):
try:
@ -957,9 +997,9 @@ class Logging(LiteLLMLoggingBaseClass):
verbose_logger.debug(
f"response_cost_failure_debug_information: {debug_info}"
)
self.model_call_details[
"response_cost_failure_debug_information"
] = debug_info
self.model_call_details["response_cost_failure_debug_information"] = (
debug_info
)
return None
try:
@ -984,9 +1024,9 @@ class Logging(LiteLLMLoggingBaseClass):
verbose_logger.debug(
f"response_cost_failure_debug_information: {debug_info}"
)
self.model_call_details[
"response_cost_failure_debug_information"
] = debug_info
self.model_call_details["response_cost_failure_debug_information"] = (
debug_info
)
return None
@ -1046,9 +1086,9 @@ class Logging(LiteLLMLoggingBaseClass):
end_time = datetime.datetime.now()
if self.completion_start_time is None:
self.completion_start_time = end_time
self.model_call_details[
"completion_start_time"
] = self.completion_start_time
self.model_call_details["completion_start_time"] = (
self.completion_start_time
)
self.model_call_details["log_event_type"] = "successful_api_call"
self.model_call_details["end_time"] = end_time
self.model_call_details["cache_hit"] = cache_hit
@ -1127,39 +1167,39 @@ class Logging(LiteLLMLoggingBaseClass):
"response_cost"
]
else:
self.model_call_details[
"response_cost"
] = self._response_cost_calculator(result=logging_result)
self.model_call_details["response_cost"] = (
self._response_cost_calculator(result=logging_result)
)
## STANDARDIZED LOGGING PAYLOAD
self.model_call_details[
"standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=logging_result,
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
self.model_call_details["standard_logging_object"] = (
get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=logging_result,
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
)
elif isinstance(result, dict) or isinstance(result, list):
## STANDARDIZED LOGGING PAYLOAD
self.model_call_details[
"standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=result,
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
self.model_call_details["standard_logging_object"] = (
get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=result,
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
)
elif standard_logging_object is not None:
self.model_call_details[
"standard_logging_object"
] = standard_logging_object
self.model_call_details["standard_logging_object"] = (
standard_logging_object
)
else: # streaming chunks + image gen.
self.model_call_details["response_cost"] = None
@ -1215,23 +1255,23 @@ class Logging(LiteLLMLoggingBaseClass):
verbose_logger.debug(
"Logging Details LiteLLM-Success Call streaming complete"
)
self.model_call_details[
"complete_streaming_response"
] = complete_streaming_response
self.model_call_details[
"response_cost"
] = self._response_cost_calculator(result=complete_streaming_response)
self.model_call_details["complete_streaming_response"] = (
complete_streaming_response
)
self.model_call_details["response_cost"] = (
self._response_cost_calculator(result=complete_streaming_response)
)
## STANDARDIZED LOGGING PAYLOAD
self.model_call_details[
"standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=complete_streaming_response,
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
self.model_call_details["standard_logging_object"] = (
get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=complete_streaming_response,
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
)
callbacks = self.get_combined_callback_list(
dynamic_success_callbacks=self.dynamic_success_callbacks,
@ -1580,10 +1620,10 @@ class Logging(LiteLLMLoggingBaseClass):
)
else:
if self.stream and complete_streaming_response:
self.model_call_details[
"complete_response"
] = self.model_call_details.get(
"complete_streaming_response", {}
self.model_call_details["complete_response"] = (
self.model_call_details.get(
"complete_streaming_response", {}
)
)
result = self.model_call_details["complete_response"]
openMeterLogger.log_success_event(
@ -1623,10 +1663,10 @@ class Logging(LiteLLMLoggingBaseClass):
)
else:
if self.stream and complete_streaming_response:
self.model_call_details[
"complete_response"
] = self.model_call_details.get(
"complete_streaming_response", {}
self.model_call_details["complete_response"] = (
self.model_call_details.get(
"complete_streaming_response", {}
)
)
result = self.model_call_details["complete_response"]
@ -1733,9 +1773,9 @@ class Logging(LiteLLMLoggingBaseClass):
if complete_streaming_response is not None:
print_verbose("Async success callbacks: Got a complete streaming response")
self.model_call_details[
"async_complete_streaming_response"
] = complete_streaming_response
self.model_call_details["async_complete_streaming_response"] = (
complete_streaming_response
)
try:
if self.model_call_details.get("cache_hit", False) is True:
self.model_call_details["response_cost"] = 0.0
@ -1745,10 +1785,10 @@ class Logging(LiteLLMLoggingBaseClass):
model_call_details=self.model_call_details
)
# base_model defaults to None if not set on model_info
self.model_call_details[
"response_cost"
] = self._response_cost_calculator(
result=complete_streaming_response
self.model_call_details["response_cost"] = (
self._response_cost_calculator(
result=complete_streaming_response
)
)
verbose_logger.debug(
@ -1761,16 +1801,16 @@ class Logging(LiteLLMLoggingBaseClass):
self.model_call_details["response_cost"] = None
## STANDARDIZED LOGGING PAYLOAD
self.model_call_details[
"standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=complete_streaming_response,
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
self.model_call_details["standard_logging_object"] = (
get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj=complete_streaming_response,
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="success",
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
)
callbacks = self.get_combined_callback_list(
dynamic_success_callbacks=self.dynamic_async_success_callbacks,
@ -1976,18 +2016,18 @@ class Logging(LiteLLMLoggingBaseClass):
## STANDARDIZED LOGGING PAYLOAD
self.model_call_details[
"standard_logging_object"
] = get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj={},
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="failure",
error_str=str(exception),
original_exception=exception,
standard_built_in_tools_params=self.standard_built_in_tools_params,
self.model_call_details["standard_logging_object"] = (
get_standard_logging_object_payload(
kwargs=self.model_call_details,
init_response_obj={},
start_time=start_time,
end_time=end_time,
logging_obj=self,
status="failure",
error_str=str(exception),
original_exception=exception,
standard_built_in_tools_params=self.standard_built_in_tools_params,
)
)
return start_time, end_time
@ -2753,9 +2793,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
endpoint=arize_config.endpoint,
)
os.environ[
"OTEL_EXPORTER_OTLP_TRACES_HEADERS"
] = f"space_key={arize_config.space_key},api_key={arize_config.api_key}"
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
f"space_key={arize_config.space_key},api_key={arize_config.api_key}"
)
for callback in _in_memory_loggers:
if (
isinstance(callback, ArizeLogger)
@ -2779,9 +2819,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
# auth can be disabled on local deployments of arize phoenix
if arize_phoenix_config.otlp_auth_headers is not None:
os.environ[
"OTEL_EXPORTER_OTLP_TRACES_HEADERS"
] = arize_phoenix_config.otlp_auth_headers
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
arize_phoenix_config.otlp_auth_headers
)
for callback in _in_memory_loggers:
if (
@ -2872,9 +2912,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
exporter="otlp_http",
endpoint="https://langtrace.ai/api/trace",
)
os.environ[
"OTEL_EXPORTER_OTLP_TRACES_HEADERS"
] = f"api_key={os.getenv('LANGTRACE_API_KEY')}"
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
f"api_key={os.getenv('LANGTRACE_API_KEY')}"
)
for callback in _in_memory_loggers:
if (
isinstance(callback, OpenTelemetry)
@ -2908,6 +2948,13 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
pagerduty_logger = PagerDutyAlerting(**custom_logger_init_args)
_in_memory_loggers.append(pagerduty_logger)
return pagerduty_logger # type: ignore
elif logging_integration == "anthropic_cache_control_hook":
for callback in _in_memory_loggers:
if isinstance(callback, AnthropicCacheControlHook):
return callback
anthropic_cache_control_hook = AnthropicCacheControlHook()
_in_memory_loggers.append(anthropic_cache_control_hook)
return anthropic_cache_control_hook # type: ignore
elif logging_integration == "gcs_pubsub":
for callback in _in_memory_loggers:
if isinstance(callback, GcsPubSubLogger):
@ -3046,6 +3093,10 @@ def get_custom_logger_compatible_class( # noqa: PLR0915
for callback in _in_memory_loggers:
if isinstance(callback, PagerDutyAlerting):
return callback
elif logging_integration == "anthropic_cache_control_hook":
for callback in _in_memory_loggers:
if isinstance(callback, AnthropicCacheControlHook):
return callback
elif logging_integration == "gcs_pubsub":
for callback in _in_memory_loggers:
if isinstance(callback, GcsPubSubLogger):
@ -3369,10 +3420,10 @@ class StandardLoggingPayloadSetup:
for key in StandardLoggingHiddenParams.__annotations__.keys():
if key in hidden_params:
if key == "additional_headers":
clean_hidden_params[
"additional_headers"
] = StandardLoggingPayloadSetup.get_additional_headers(
hidden_params[key]
clean_hidden_params["additional_headers"] = (
StandardLoggingPayloadSetup.get_additional_headers(
hidden_params[key]
)
)
else:
clean_hidden_params[key] = hidden_params[key] # type: ignore
@ -3651,7 +3702,7 @@ def emit_standard_logging_payload(payload: StandardLoggingPayload):
def get_standard_logging_metadata(
metadata: Optional[Dict[str, Any]]
metadata: Optional[Dict[str, Any]],
) -> StandardLoggingMetadata:
"""
Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata.
@ -3715,9 +3766,9 @@ def scrub_sensitive_keys_in_metadata(litellm_params: Optional[dict]):
):
for k, v in metadata["user_api_key_metadata"].items():
if k == "logging": # prevent logging user logging keys
cleaned_user_api_key_metadata[
k
] = "scrubbed_by_litellm_for_sensitive_keys"
cleaned_user_api_key_metadata[k] = (
"scrubbed_by_litellm_for_sensitive_keys"
)
else:
cleaned_user_api_key_metadata[k] = v

View file

@ -44,7 +44,7 @@ from litellm.utils import (
token_counter,
)
from ..common_utils import AnthropicError, process_anthropic_headers
from ..common_utils import AnthropicError, AnthropicModelInfo, process_anthropic_headers
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
@ -54,7 +54,7 @@ else:
LoggingClass = Any
class AnthropicConfig(BaseConfig):
class AnthropicConfig(AnthropicModelInfo, BaseConfig):
"""
Reference: https://docs.anthropic.com/claude/reference/messages_post
@ -127,41 +127,6 @@ class AnthropicConfig(BaseConfig):
"anthropic-beta": "prompt-caching-2024-07-31",
}
def get_anthropic_headers(
self,
api_key: str,
anthropic_version: Optional[str] = None,
computer_tool_used: bool = False,
prompt_caching_set: bool = False,
pdf_used: bool = False,
is_vertex_request: bool = False,
user_anthropic_beta_headers: Optional[List[str]] = None,
) -> dict:
betas = set()
if prompt_caching_set:
betas.add("prompt-caching-2024-07-31")
if computer_tool_used:
betas.add("computer-use-2024-10-22")
if pdf_used:
betas.add("pdfs-2024-09-25")
headers = {
"anthropic-version": anthropic_version or "2023-06-01",
"x-api-key": api_key,
"accept": "application/json",
"content-type": "application/json",
}
if user_anthropic_beta_headers is not None:
betas.update(user_anthropic_beta_headers)
# Don't send any beta headers to Vertex, Vertex has failed requests when they are sent
if is_vertex_request is True:
pass
elif len(betas) > 0:
headers["anthropic-beta"] = ",".join(betas)
return headers
def _map_tool_choice(
self, tool_choice: Optional[str], parallel_tool_use: Optional[bool]
) -> Optional[AnthropicMessagesToolChoice]:
@ -446,49 +411,6 @@ class AnthropicConfig(BaseConfig):
)
return _tool
def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
"""
Return if {"cache_control": ..} in message content block
Used to check if anthropic prompt caching headers need to be set.
"""
for message in messages:
if message.get("cache_control", None) is not None:
return True
_message_content = message.get("content")
if _message_content is not None and isinstance(_message_content, list):
for content in _message_content:
if "cache_control" in content:
return True
return False
def is_computer_tool_used(
self, tools: Optional[List[AllAnthropicToolsValues]]
) -> bool:
if tools is None:
return False
for tool in tools:
if "type" in tool and tool["type"].startswith("computer_"):
return True
return False
def is_pdf_used(self, messages: List[AllMessageValues]) -> bool:
"""
Set to true if media passed into messages.
"""
for message in messages:
if (
"content" in message
and message["content"] is not None
and isinstance(message["content"], list)
):
for content in message["content"]:
if "type" in content and content["type"] != "text":
return True
return False
def translate_system_message(
self, messages: List[AllMessageValues]
) -> List[AnthropicSystemMessageContent]:
@ -862,47 +784,3 @@ class AnthropicConfig(BaseConfig):
message=error_message,
headers=cast(httpx.Headers, headers),
)
def _get_user_anthropic_beta_headers(
self, anthropic_beta_header: Optional[str]
) -> Optional[List[str]]:
if anthropic_beta_header is None:
return None
return anthropic_beta_header.split(",")
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> Dict:
if api_key is None:
raise litellm.AuthenticationError(
message="Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params. Please set `ANTHROPIC_API_KEY` in your environment vars",
llm_provider="anthropic",
model=model,
)
tools = optional_params.get("tools")
prompt_caching_set = self.is_cache_control_set(messages=messages)
computer_tool_used = self.is_computer_tool_used(tools=tools)
pdf_used = self.is_pdf_used(messages=messages)
user_anthropic_beta_headers = self._get_user_anthropic_beta_headers(
anthropic_beta_header=headers.get("anthropic-beta")
)
anthropic_headers = self.get_anthropic_headers(
computer_tool_used=computer_tool_used,
prompt_caching_set=prompt_caching_set,
pdf_used=pdf_used,
api_key=api_key,
is_vertex_request=optional_params.get("is_vertex_request", False),
user_anthropic_beta_headers=user_anthropic_beta_headers,
)
headers = {**headers, **anthropic_headers}
return headers

View file

@ -2,7 +2,7 @@
This file contains common utils for anthropic calls.
"""
from typing import List, Optional, Union
from typing import Dict, List, Optional, Union
import httpx
@ -10,6 +10,8 @@ import litellm
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.anthropic import AllAnthropicToolsValues
from litellm.types.llms.openai import AllMessageValues
class AnthropicError(BaseLLMException):
@ -23,6 +25,128 @@ class AnthropicError(BaseLLMException):
class AnthropicModelInfo(BaseLLMModelInfo):
def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
"""
Return if {"cache_control": ..} in message content block
Used to check if anthropic prompt caching headers need to be set.
"""
for message in messages:
if message.get("cache_control", None) is not None:
return True
_message_content = message.get("content")
if _message_content is not None and isinstance(_message_content, list):
for content in _message_content:
if "cache_control" in content:
return True
return False
def is_computer_tool_used(
self, tools: Optional[List[AllAnthropicToolsValues]]
) -> bool:
if tools is None:
return False
for tool in tools:
if "type" in tool and tool["type"].startswith("computer_"):
return True
return False
def is_pdf_used(self, messages: List[AllMessageValues]) -> bool:
"""
Set to true if media passed into messages.
"""
for message in messages:
if (
"content" in message
and message["content"] is not None
and isinstance(message["content"], list)
):
for content in message["content"]:
if "type" in content and content["type"] != "text":
return True
return False
def _get_user_anthropic_beta_headers(
self, anthropic_beta_header: Optional[str]
) -> Optional[List[str]]:
if anthropic_beta_header is None:
return None
return anthropic_beta_header.split(",")
def get_anthropic_headers(
self,
api_key: str,
anthropic_version: Optional[str] = None,
computer_tool_used: bool = False,
prompt_caching_set: bool = False,
pdf_used: bool = False,
is_vertex_request: bool = False,
user_anthropic_beta_headers: Optional[List[str]] = None,
) -> dict:
betas = set()
if prompt_caching_set:
betas.add("prompt-caching-2024-07-31")
if computer_tool_used:
betas.add("computer-use-2024-10-22")
if pdf_used:
betas.add("pdfs-2024-09-25")
headers = {
"anthropic-version": anthropic_version or "2023-06-01",
"x-api-key": api_key,
"accept": "application/json",
"content-type": "application/json",
}
if user_anthropic_beta_headers is not None:
betas.update(user_anthropic_beta_headers)
# Don't send any beta headers to Vertex, Vertex has failed requests when they are sent
if is_vertex_request is True:
pass
elif len(betas) > 0:
headers["anthropic-beta"] = ",".join(betas)
return headers
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> Dict:
if api_key is None:
raise litellm.AuthenticationError(
message="Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params. Please set `ANTHROPIC_API_KEY` in your environment vars",
llm_provider="anthropic",
model=model,
)
tools = optional_params.get("tools")
prompt_caching_set = self.is_cache_control_set(messages=messages)
computer_tool_used = self.is_computer_tool_used(tools=tools)
pdf_used = self.is_pdf_used(messages=messages)
user_anthropic_beta_headers = self._get_user_anthropic_beta_headers(
anthropic_beta_header=headers.get("anthropic-beta")
)
anthropic_headers = self.get_anthropic_headers(
computer_tool_used=computer_tool_used,
prompt_caching_set=prompt_caching_set,
pdf_used=pdf_used,
api_key=api_key,
is_vertex_request=optional_params.get("is_vertex_request", False),
user_anthropic_beta_headers=user_anthropic_beta_headers,
)
headers = {**headers, **anthropic_headers}
return headers
@staticmethod
def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
return (

View file

@ -125,14 +125,22 @@ class AzureOpenAIConfig(BaseConfig):
) -> bool:
"""
- check if api_version is supported for response_format
- returns True if the API version is equal to or newer than the supported version
"""
api_year = int(api_version_year)
api_month = int(api_version_month)
supported_year = int(API_VERSION_YEAR_SUPPORTED_RESPONSE_FORMAT)
supported_month = int(API_VERSION_MONTH_SUPPORTED_RESPONSE_FORMAT)
is_supported = (
int(api_version_year) <= API_VERSION_YEAR_SUPPORTED_RESPONSE_FORMAT
and int(api_version_month) >= API_VERSION_MONTH_SUPPORTED_RESPONSE_FORMAT
)
return is_supported
# If the year is greater than supported year, it's definitely supported
if api_year > supported_year:
return True
# If the year is less than supported year, it's not supported
elif api_year < supported_year:
return False
# If same year, check if month is >= supported month
else:
return api_month >= supported_month
def map_openai_params(
self,
@ -202,6 +210,7 @@ class AzureOpenAIConfig(BaseConfig):
is_response_format_supported_api_version
and _is_response_format_supported_model
)
optional_params = self._add_response_format_to_tools(
optional_params=optional_params,
value=value,

View file

@ -309,21 +309,30 @@ class BaseAzureLLM(BaseOpenAILLM):
azure_ad_token_provider: Optional[Callable[[], str]] = None
# If we have api_key, then we have higher priority
azure_ad_token = litellm_params.get("azure_ad_token")
tenant_id = litellm_params.get("tenant_id")
client_id = litellm_params.get("client_id")
client_secret = litellm_params.get("client_secret")
azure_username = litellm_params.get("azure_username")
azure_password = litellm_params.get("azure_password")
tenant_id = litellm_params.get("tenant_id", os.getenv("AZURE_TENANT_ID"))
client_id = litellm_params.get("client_id", os.getenv("AZURE_CLIENT_ID"))
client_secret = litellm_params.get(
"client_secret", os.getenv("AZURE_CLIENT_SECRET")
)
azure_username = litellm_params.get(
"azure_username", os.getenv("AZURE_USERNAME")
)
azure_password = litellm_params.get(
"azure_password", os.getenv("AZURE_PASSWORD")
)
max_retries = litellm_params.get("max_retries")
timeout = litellm_params.get("timeout")
if not api_key and tenant_id and client_id and client_secret:
verbose_logger.debug("Using Azure AD Token Provider for Azure Auth")
verbose_logger.debug(
"Using Azure AD Token Provider from Entrata ID for Azure Auth"
)
azure_ad_token_provider = get_azure_ad_token_from_entrata_id(
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
)
if azure_username and azure_password and client_id:
verbose_logger.debug("Using Azure Username and Password for Azure Auth")
azure_ad_token_provider = get_azure_ad_token_from_username_password(
azure_username=azure_username,
azure_password=azure_password,
@ -331,12 +340,16 @@ class BaseAzureLLM(BaseOpenAILLM):
)
if azure_ad_token is not None and azure_ad_token.startswith("oidc/"):
verbose_logger.debug("Using Azure OIDC Token for Azure Auth")
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
elif (
not api_key
and azure_ad_token_provider is None
and litellm.enable_azure_ad_token_refresh is True
):
verbose_logger.debug(
"Using Azure AD token provider based on Service Principal with Secret workflow for Azure Auth"
)
try:
azure_ad_token_provider = get_azure_ad_token_provider()
except ValueError:

View file

@ -44,6 +44,19 @@ class BaseLLMModelInfo(ABC):
def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
pass
@abstractmethod
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
pass
@staticmethod
@abstractmethod
def get_base_model(model: str) -> Optional[str]:

View file

@ -44,7 +44,18 @@ class AmazonBedrockGlobalConfig:
)
def get_ap_regions(self) -> List[str]:
return ["ap-northeast-1", "ap-northeast-2", "ap-northeast-3", "ap-south-1"]
"""
Source: https://www.aws-services.info/bedrock.html
"""
return [
"ap-northeast-1", # Asia Pacific (Tokyo)
"ap-northeast-2", # Asia Pacific (Seoul)
"ap-northeast-3", # Asia Pacific (Osaka)
"ap-south-1", # Asia Pacific (Mumbai)
"ap-south-2", # Asia Pacific (Hyderabad)
"ap-southeast-1", # Asia Pacific (Singapore)
"ap-southeast-2", # Asia Pacific (Sydney)
]
def get_sa_regions(self) -> List[str]:
return ["sa-east-1"]
@ -54,10 +65,14 @@ class AmazonBedrockGlobalConfig:
Source: https://www.aws-services.info/bedrock.html
"""
return [
"eu-west-1",
"eu-west-2",
"eu-west-3",
"eu-central-1",
"eu-west-1", # Europe (Ireland)
"eu-west-2", # Europe (London)
"eu-west-3", # Europe (Paris)
"eu-central-1", # Europe (Frankfurt)
"eu-central-2", # Europe (Zurich)
"eu-south-1", # Europe (Milan)
"eu-south-2", # Europe (Spain)
"eu-north-1", # Europe (Stockholm)
]
def get_ca_regions(self) -> List[str]:
@ -68,11 +83,11 @@ class AmazonBedrockGlobalConfig:
Source: https://www.aws-services.info/bedrock.html
"""
return [
"us-east-2",
"us-east-1",
"us-west-1",
"us-west-2",
"us-gov-west-1",
"us-east-1", # US East (N. Virginia)
"us-east-2", # US East (Ohio)
"us-west-2", # US West (Oregon)
"us-gov-east-1", # AWS GovCloud (US-East)
"us-gov-west-1", # AWS GovCloud (US-West)
]

View file

@ -0,0 +1,356 @@
import time
from typing import TYPE_CHECKING, Any, AsyncIterator, Iterator, List, Optional, Union
import httpx
import litellm
from litellm.litellm_core_utils.prompt_templates.factory import cohere_messages_pt_v2
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
from litellm.types.llms.cohere import CohereV2ChatResponse
from litellm.types.llms.openai import AllMessageValues, ChatCompletionToolCallChunk
from litellm.types.utils import ModelResponse, Usage
from ..common_utils import CohereError
from ..common_utils import ModelResponseIterator as CohereModelResponseIterator
from ..common_utils import validate_environment as cohere_validate_environment
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
LiteLLMLoggingObj = _LiteLLMLoggingObj
else:
LiteLLMLoggingObj = Any
class CohereV2ChatConfig(BaseConfig):
"""
Configuration class for Cohere's API interface.
Args:
preamble (str, optional): When specified, the default Cohere preamble will be replaced with the provided one.
chat_history (List[Dict[str, str]], optional): A list of previous messages between the user and the model.
generation_id (str, optional): Unique identifier for the generated reply.
response_id (str, optional): Unique identifier for the response.
conversation_id (str, optional): An alternative to chat_history, creates or resumes a persisted conversation.
prompt_truncation (str, optional): Dictates how the prompt will be constructed. Options: 'AUTO', 'AUTO_PRESERVE_ORDER', 'OFF'.
connectors (List[Dict[str, str]], optional): List of connectors (e.g., web-search) to enrich the model's reply.
search_queries_only (bool, optional): When true, the response will only contain a list of generated search queries.
documents (List[Dict[str, str]], optional): A list of relevant documents that the model can cite.
temperature (float, optional): A non-negative float that tunes the degree of randomness in generation.
max_tokens (int, optional): The maximum number of tokens the model will generate as part of the response.
k (int, optional): Ensures only the top k most likely tokens are considered for generation at each step.
p (float, optional): Ensures that only the most likely tokens, with total probability mass of p, are considered for generation.
frequency_penalty (float, optional): Used to reduce repetitiveness of generated tokens.
presence_penalty (float, optional): Used to reduce repetitiveness of generated tokens.
tools (List[Dict[str, str]], optional): A list of available tools (functions) that the model may suggest invoking.
tool_results (List[Dict[str, Any]], optional): A list of results from invoking tools.
seed (int, optional): A seed to assist reproducibility of the model's response.
"""
preamble: Optional[str] = None
chat_history: Optional[list] = None
generation_id: Optional[str] = None
response_id: Optional[str] = None
conversation_id: Optional[str] = None
prompt_truncation: Optional[str] = None
connectors: Optional[list] = None
search_queries_only: Optional[bool] = None
documents: Optional[list] = None
temperature: Optional[int] = None
max_tokens: Optional[int] = None
k: Optional[int] = None
p: Optional[int] = None
frequency_penalty: Optional[int] = None
presence_penalty: Optional[int] = None
tools: Optional[list] = None
tool_results: Optional[list] = None
seed: Optional[int] = None
def __init__(
self,
preamble: Optional[str] = None,
chat_history: Optional[list] = None,
generation_id: Optional[str] = None,
response_id: Optional[str] = None,
conversation_id: Optional[str] = None,
prompt_truncation: Optional[str] = None,
connectors: Optional[list] = None,
search_queries_only: Optional[bool] = None,
documents: Optional[list] = None,
temperature: Optional[int] = None,
max_tokens: Optional[int] = None,
k: Optional[int] = None,
p: Optional[int] = None,
frequency_penalty: Optional[int] = None,
presence_penalty: Optional[int] = None,
tools: Optional[list] = None,
tool_results: Optional[list] = None,
seed: Optional[int] = None,
) -> None:
locals_ = locals()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
return cohere_validate_environment(
headers=headers,
model=model,
messages=messages,
optional_params=optional_params,
api_key=api_key,
)
def get_supported_openai_params(self, model: str) -> List[str]:
return [
"stream",
"temperature",
"max_tokens",
"top_p",
"frequency_penalty",
"presence_penalty",
"stop",
"n",
"tools",
"tool_choice",
"seed",
"extra_headers",
]
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
for param, value in non_default_params.items():
if param == "stream":
optional_params["stream"] = value
if param == "temperature":
optional_params["temperature"] = value
if param == "max_tokens":
optional_params["max_tokens"] = value
if param == "n":
optional_params["num_generations"] = value
if param == "top_p":
optional_params["p"] = value
if param == "frequency_penalty":
optional_params["frequency_penalty"] = value
if param == "presence_penalty":
optional_params["presence_penalty"] = value
if param == "stop":
optional_params["stop_sequences"] = value
if param == "tools":
optional_params["tools"] = value
if param == "seed":
optional_params["seed"] = value
return optional_params
def transform_request(
self,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
headers: dict,
) -> dict:
## Load Config
for k, v in litellm.CohereChatConfig.get_config().items():
if (
k not in optional_params
): # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
optional_params[k] = v
most_recent_message, chat_history = cohere_messages_pt_v2(
messages=messages, model=model, llm_provider="cohere_chat"
)
## Handle Tool Calling
if "tools" in optional_params:
_is_function_call = True
cohere_tools = self._construct_cohere_tool(tools=optional_params["tools"])
optional_params["tools"] = cohere_tools
if isinstance(most_recent_message, dict):
optional_params["tool_results"] = [most_recent_message]
elif isinstance(most_recent_message, str):
optional_params["message"] = most_recent_message
## check if chat history message is 'user' and 'tool_results' is given -> force_single_step=True, else cohere api fails
if len(chat_history) > 0 and chat_history[-1]["role"] == "USER":
optional_params["force_single_step"] = True
return optional_params
def transform_response(
self,
model: str,
raw_response: httpx.Response,
model_response: ModelResponse,
logging_obj: LiteLLMLoggingObj,
request_data: dict,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
encoding: Any,
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
) -> ModelResponse:
try:
raw_response_json = raw_response.json()
except Exception:
raise CohereError(
message=raw_response.text, status_code=raw_response.status_code
)
try:
cohere_v2_chat_response = CohereV2ChatResponse(**raw_response_json) # type: ignore
except Exception:
raise CohereError(message=raw_response.text, status_code=422)
cohere_content = cohere_v2_chat_response["message"].get("content", None)
if cohere_content is not None:
model_response.choices[0].message.content = "".join( # type: ignore
[
content.get("text", "")
for content in cohere_content
if content is not None
]
)
## ADD CITATIONS
if "citations" in cohere_v2_chat_response:
setattr(model_response, "citations", cohere_v2_chat_response["citations"])
## Tool calling response
cohere_tools_response = cohere_v2_chat_response["message"].get("tool_calls", [])
if cohere_tools_response is not None and cohere_tools_response != []:
# convert cohere_tools_response to OpenAI response format
tool_calls: List[ChatCompletionToolCallChunk] = []
for index, tool in enumerate(cohere_tools_response):
tool_call: ChatCompletionToolCallChunk = {
**tool, # type: ignore
"index": index,
}
tool_calls.append(tool_call)
_message = litellm.Message(
tool_calls=tool_calls,
content=None,
)
model_response.choices[0].message = _message # type: ignore
## CALCULATING USAGE - use cohere `billed_units` for returning usage
token_usage = cohere_v2_chat_response["usage"].get("tokens", {})
prompt_tokens = token_usage.get("input_tokens", 0)
completion_tokens = token_usage.get("output_tokens", 0)
model_response.created = int(time.time())
model_response.model = model
usage = Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
)
setattr(model_response, "usage", usage)
return model_response
def _construct_cohere_tool(
self,
tools: Optional[list] = None,
):
if tools is None:
tools = []
cohere_tools = []
for tool in tools:
cohere_tool = self._translate_openai_tool_to_cohere(tool)
cohere_tools.append(cohere_tool)
return cohere_tools
def _translate_openai_tool_to_cohere(
self,
openai_tool: dict,
):
# cohere tools look like this
"""
{
"name": "query_daily_sales_report",
"description": "Connects to a database to retrieve overall sales volumes and sales information for a given day.",
"parameter_definitions": {
"day": {
"description": "Retrieves sales data for this day, formatted as YYYY-MM-DD.",
"type": "str",
"required": True
}
}
}
"""
# OpenAI tools look like this
"""
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
"""
cohere_tool = {
"name": openai_tool["function"]["name"],
"description": openai_tool["function"]["description"],
"parameter_definitions": {},
}
for param_name, param_def in openai_tool["function"]["parameters"][
"properties"
].items():
required_params = (
openai_tool.get("function", {})
.get("parameters", {})
.get("required", [])
)
cohere_param_def = {
"description": param_def.get("description", ""),
"type": param_def.get("type", ""),
"required": param_name in required_params,
}
cohere_tool["parameter_definitions"][param_name] = cohere_param_def
return cohere_tool
def get_model_response_iterator(
self,
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
sync_stream: bool,
json_mode: Optional[bool] = False,
):
return CohereModelResponseIterator(
streaming_response=streaming_response,
sync_stream=sync_stream,
json_mode=json_mode,
)
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:
return CohereError(status_code=status_code, message=error_message)

View file

@ -104,19 +104,28 @@ class ModelResponseIterator:
raise RuntimeError(f"Error receiving chunk from stream: {e}")
try:
str_line = chunk
if isinstance(chunk, bytes): # Handle binary data
str_line = chunk.decode("utf-8") # Convert bytes to string
index = str_line.find("data:")
if index != -1:
str_line = str_line[index:]
data_json = json.loads(str_line)
return self.chunk_parser(chunk=data_json)
return self.convert_str_chunk_to_generic_chunk(chunk=chunk)
except StopIteration:
raise StopIteration
except ValueError as e:
raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
def convert_str_chunk_to_generic_chunk(self, chunk: str) -> GenericStreamingChunk:
"""
Convert a string chunk to a GenericStreamingChunk
Note: This is used for Cohere pass through streaming logging
"""
str_line = chunk
if isinstance(chunk, bytes): # Handle binary data
str_line = chunk.decode("utf-8") # Convert bytes to string
index = str_line.find("data:")
if index != -1:
str_line = str_line[index:]
data_json = json.loads(str_line)
return self.chunk_parser(chunk=data_json)
# Async iterator
def __aiter__(self):
self.async_response_iterator = self.streaming_response.__aiter__()
@ -131,15 +140,7 @@ class ModelResponseIterator:
raise RuntimeError(f"Error receiving chunk from stream: {e}")
try:
str_line = chunk
if isinstance(chunk, bytes): # Handle binary data
str_line = chunk.decode("utf-8") # Convert bytes to string
index = str_line.find("data:")
if index != -1:
str_line = str_line[index:]
data_json = json.loads(str_line)
return self.chunk_parser(chunk=data_json)
return self.convert_str_chunk_to_generic_chunk(chunk=chunk)
except StopAsyncIteration:
raise StopAsyncIteration
except ValueError as e:

View file

@ -389,7 +389,7 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig):
)
@staticmethod
def get_base_model(model: str) -> str:
def get_base_model(model: Optional[str] = None) -> Optional[str]:
return model
def get_model_response_iterator(

View file

@ -1,6 +1,7 @@
from typing import List, Optional
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues
from ..base_llm.base_utils import BaseLLMModelInfo
from ..base_llm.chat.transformation import BaseLLMException
@ -11,6 +12,26 @@ class TopazException(BaseLLMException):
class TopazModelInfo(BaseLLMModelInfo):
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
if api_key is None:
raise ValueError(
"API key is required for Topaz image variations. Set via `TOPAZ_API_KEY` or `api_key=..`"
)
return {
# "Content-Type": "multipart/form-data",
"Accept": "image/jpeg",
"X-API-Key": api_key,
}
def get_models(
self, api_key: Optional[str] = None, api_base: Optional[str] = None
) -> List[str]:

View file

@ -10,10 +10,7 @@ from litellm.llms.base_llm.chat.transformation import (
BaseLLMException,
LiteLLMLoggingObj,
)
from litellm.types.llms.openai import (
AllMessageValues,
OpenAIImageVariationOptionalParams,
)
from litellm.types.llms.openai import OpenAIImageVariationOptionalParams
from litellm.types.utils import (
FileTypes,
HttpHandlerRequestFields,
@ -22,35 +19,15 @@ from litellm.types.utils import (
)
from ...base_llm.image_variations.transformation import BaseImageVariationConfig
from ..common_utils import TopazException
from ..common_utils import TopazException, TopazModelInfo
class TopazImageVariationConfig(BaseImageVariationConfig):
class TopazImageVariationConfig(TopazModelInfo, BaseImageVariationConfig):
def get_supported_openai_params(
self, model: str
) -> List[OpenAIImageVariationOptionalParams]:
return ["response_format", "size"]
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
if api_key is None:
raise ValueError(
"API key is required for Topaz image variations. Set via `TOPAZ_API_KEY` or `api_key=..`"
)
return {
# "Content-Type": "multipart/form-data",
"Accept": "image/jpeg",
"X-API-Key": api_key,
}
def get_complete_url(
self,
api_base: Optional[str],

View file

@ -0,0 +1,75 @@
from typing import List, Optional, Union
import httpx
import litellm
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues
from litellm.utils import _add_path_to_api_base
class VLLMError(BaseLLMException):
pass
class VLLMModelInfo(BaseLLMModelInfo):
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
"""Google AI Studio sends api key in query params"""
return headers
@staticmethod
def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
api_base = api_base or get_secret_str("VLLM_API_BASE")
if api_base is None:
raise ValueError(
"VLLM_API_BASE is not set. Please set the environment variable, to use VLLM's pass-through - `{LITELLM_API_BASE}/vllm/{endpoint}`."
)
return api_base
@staticmethod
def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
return None
@staticmethod
def get_base_model(model: str) -> Optional[str]:
return model
def get_models(
self, api_key: Optional[str] = None, api_base: Optional[str] = None
) -> List[str]:
api_base = VLLMModelInfo.get_api_base(api_base)
api_key = VLLMModelInfo.get_api_key(api_key)
endpoint = "/v1/models"
if api_base is None or api_key is None:
raise ValueError(
"GEMINI_API_BASE or GEMINI_API_KEY is not set. Please set the environment variable, to query Gemini's `/models` endpoint."
)
url = _add_path_to_api_base(api_base, endpoint)
response = litellm.module_level_client.get(
url=url,
)
response.raise_for_status()
models = response.json()["data"]
return [model["id"] for model in models]
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:
return VLLMError(
status_code=status_code, message=error_message, headers=headers
)

View file

@ -5,9 +5,29 @@ import httpx
import litellm
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues
class XAIModelInfo(BaseLLMModelInfo):
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
if api_key is not None:
headers["Authorization"] = f"Bearer {api_key}"
# Ensure Content-Type is set to application/json
if "content-type" not in headers and "Content-Type" not in headers:
headers["Content-Type"] = "application/json"
return headers
@staticmethod
def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
return api_base or get_secret_str("XAI_API_BASE") or "https://api.x.ai"

View file

@ -954,7 +954,11 @@ def completion( # type: ignore # noqa: PLR0915
non_default_params = get_non_default_completion_params(kwargs=kwargs)
litellm_params = {} # used to prevent unbound var errors
## PROMPT MANAGEMENT HOOKS ##
if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and prompt_id is not None:
if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and (
litellm_logging_obj.should_run_prompt_management_hooks(
prompt_id=prompt_id, non_default_params=non_default_params
)
):
(
model,
messages,
@ -2654,9 +2658,9 @@ def completion( # type: ignore # noqa: PLR0915
"aws_region_name" not in optional_params
or optional_params["aws_region_name"] is None
):
optional_params[
"aws_region_name"
] = aws_bedrock_client.meta.region_name
optional_params["aws_region_name"] = (
aws_bedrock_client.meta.region_name
)
bedrock_route = BedrockModelInfo.get_bedrock_route(model)
if bedrock_route == "converse":
@ -4363,9 +4367,9 @@ def adapter_completion(
new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs)
response: Union[ModelResponse, CustomStreamWrapper] = completion(**new_kwargs) # type: ignore
translated_response: Optional[
Union[BaseModel, AdapterCompletionStreamWrapper]
] = None
translated_response: Optional[Union[BaseModel, AdapterCompletionStreamWrapper]] = (
None
)
if isinstance(response, ModelResponse):
translated_response = translation_obj.translate_completion_output_params(
response=response
@ -5785,9 +5789,9 @@ def stream_chunk_builder( # noqa: PLR0915
]
if len(content_chunks) > 0:
response["choices"][0]["message"][
"content"
] = processor.get_combined_content(content_chunks)
response["choices"][0]["message"]["content"] = (
processor.get_combined_content(content_chunks)
)
reasoning_chunks = [
chunk
@ -5798,9 +5802,9 @@ def stream_chunk_builder( # noqa: PLR0915
]
if len(reasoning_chunks) > 0:
response["choices"][0]["message"][
"reasoning_content"
] = processor.get_combined_reasoning_content(reasoning_chunks)
response["choices"][0]["message"]["reasoning_content"] = (
processor.get_combined_reasoning_content(reasoning_chunks)
)
audio_chunks = [
chunk

View file

@ -2669,7 +2669,7 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000008,
"input_cost_per_audio_token": 0.000004,
"output_cost_per_token": 0.00032,
"output_cost_per_token": 0.00000032,
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_audio_input": true,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-87ec698bfca6820e.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/61b39adb1cb75213.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[98621,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-6810261f4d6c8bbf.js\",\"261\",\"static/chunks/261-d4b99bc9f53d4ef3.js\",\"899\",\"static/chunks/899-8d832fe7c09b2afe.js\",\"817\",\"static/chunks/817-c8c94d51d28af935.js\",\"250\",\"static/chunks/250-f21e8c1de1717077.js\",\"699\",\"static/chunks/699-b82671c2a3a100c7.js\",\"931\",\"static/chunks/app/page-0f39fb1d0bde3c55.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"eKo9wJb6ZFjAy85GaOeQ6\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/61b39adb1cb75213.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-87ec698bfca6820e.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/61b39adb1cb75213.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[37185,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-6810261f4d6c8bbf.js\",\"261\",\"static/chunks/261-d4b99bc9f53d4ef3.js\",\"899\",\"static/chunks/899-8d832fe7c09b2afe.js\",\"817\",\"static/chunks/817-c8c94d51d28af935.js\",\"250\",\"static/chunks/250-ef859ae76fcc7788.js\",\"699\",\"static/chunks/699-b82671c2a3a100c7.js\",\"931\",\"static/chunks/app/page-0f22a8aad3780430.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"cxKWUnb1loD53gKRUqiFm\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/61b39adb1cb75213.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"]
3:I[98621,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","899","static/chunks/899-8d832fe7c09b2afe.js","817","static/chunks/817-c8c94d51d28af935.js","250","static/chunks/250-f21e8c1de1717077.js","699","static/chunks/699-b82671c2a3a100c7.js","931","static/chunks/app/page-0f39fb1d0bde3c55.js"],"default",1]
3:I[37185,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","899","static/chunks/899-8d832fe7c09b2afe.js","817","static/chunks/817-c8c94d51d28af935.js","250","static/chunks/250-ef859ae76fcc7788.js","699","static/chunks/699-b82671c2a3a100c7.js","931","static/chunks/app/page-0f22a8aad3780430.js"],"default",1]
4:I[4707,[],""]
5:I[36423,[],""]
0:["eKo9wJb6ZFjAy85GaOeQ6",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
0:["cxKWUnb1loD53gKRUqiFm",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"]
3:I[52829,["42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","250","static/chunks/250-f21e8c1de1717077.js","699","static/chunks/699-b82671c2a3a100c7.js","418","static/chunks/app/model_hub/page-068a441595bd0fc3.js"],"default",1]
3:I[52829,["42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","250","static/chunks/250-ef859ae76fcc7788.js","699","static/chunks/699-b82671c2a3a100c7.js","418","static/chunks/app/model_hub/page-068a441595bd0fc3.js"],"default",1]
4:I[4707,[],""]
5:I[36423,[],""]
0:["eKo9wJb6ZFjAy85GaOeQ6",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
0:["cxKWUnb1loD53gKRUqiFm",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"]
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-6810261f4d6c8bbf.js","899","static/chunks/899-8d832fe7c09b2afe.js","250","static/chunks/250-f21e8c1de1717077.js","461","static/chunks/app/onboarding/page-957334f0c5d5df41.js"],"default",1]
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-6810261f4d6c8bbf.js","899","static/chunks/899-8d832fe7c09b2afe.js","250","static/chunks/250-ef859ae76fcc7788.js","461","static/chunks/app/onboarding/page-957334f0c5d5df41.js"],"default",1]
4:I[4707,[],""]
5:I[36423,[],""]
0:["eKo9wJb6ZFjAy85GaOeQ6",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
0:["cxKWUnb1loD53gKRUqiFm",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -33,6 +33,7 @@ model_list:
litellm_settings:
num_retries: 0
callbacks: ["prometheus"]
check_provider_endpoint: true
files_settings:
- custom_llm_provider: gemini

View file

@ -317,6 +317,8 @@ class LiteLLMRoutes(enum.Enum):
"/openai",
"/assemblyai",
"/eu.assemblyai",
"/vllm",
"/mistral",
]
anthropic_routes = [

View file

@ -1,11 +1,12 @@
# What is this?
## Common checks for /v1/models and `/model/info`
import copy
from typing import Dict, List, Optional, Set
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import SpecialModelNames, UserAPIKeyAuth
from litellm.router import Router
from litellm.types.router import LiteLLM_Params
from litellm.utils import get_valid_models
@ -23,15 +24,20 @@ def _check_wildcard_routing(model: str) -> bool:
return False
def get_provider_models(provider: str) -> Optional[List[str]]:
def get_provider_models(
provider: str, litellm_params: Optional[LiteLLM_Params] = None
) -> Optional[List[str]]:
"""
Returns the list of known models by provider
"""
if provider == "*":
return get_valid_models()
return get_valid_models(litellm_params=litellm_params)
if provider in litellm.models_by_provider:
provider_models = copy.deepcopy(litellm.models_by_provider[provider])
provider_models = get_valid_models(
custom_llm_provider=provider, litellm_params=litellm_params
)
# provider_models = copy.deepcopy(litellm.models_by_provider[provider])
for idx, _model in enumerate(provider_models):
if provider not in _model:
provider_models[idx] = f"{provider}/{_model}"
@ -118,6 +124,7 @@ def get_complete_model_list(
user_model: Optional[str],
infer_model_from_keys: Optional[bool],
return_wildcard_routes: Optional[bool] = False,
llm_router: Optional[Router] = None,
) -> List[str]:
"""Logic for returning complete model list for a given key + team pair"""
@ -143,19 +150,25 @@ def get_complete_model_list(
unique_models.update(valid_models)
all_wildcard_models = _get_wildcard_models(
unique_models=unique_models, return_wildcard_routes=return_wildcard_routes
unique_models=unique_models,
return_wildcard_routes=return_wildcard_routes,
llm_router=llm_router,
)
return list(unique_models) + all_wildcard_models
def get_known_models_from_wildcard(wildcard_model: str) -> List[str]:
def get_known_models_from_wildcard(
wildcard_model: str, litellm_params: Optional[LiteLLM_Params] = None
) -> List[str]:
try:
provider, model = wildcard_model.split("/", 1)
except ValueError: # safely fail
return []
# get all known provider models
wildcard_models = get_provider_models(provider=provider)
wildcard_models = get_provider_models(
provider=provider, litellm_params=litellm_params
)
if wildcard_models is None:
return []
if model == "*":
@ -172,7 +185,9 @@ def get_known_models_from_wildcard(wildcard_model: str) -> List[str]:
def _get_wildcard_models(
unique_models: Set[str], return_wildcard_routes: Optional[bool] = False
unique_models: Set[str],
return_wildcard_routes: Optional[bool] = False,
llm_router: Optional[Router] = None,
) -> List[str]:
models_to_remove = set()
all_wildcard_models = []
@ -183,12 +198,25 @@ def _get_wildcard_models(
): # will add the wildcard route to the list eg: anthropic/*.
all_wildcard_models.append(model)
# get all known provider models
wildcard_models = get_known_models_from_wildcard(wildcard_model=model)
## get litellm params from model
if llm_router is not None:
model_list = llm_router.get_model_list(model_name=model)
if model_list is not None:
for router_model in model_list:
wildcard_models = get_known_models_from_wildcard(
wildcard_model=model,
litellm_params=LiteLLM_Params(
**router_model["litellm_params"] # type: ignore
),
)
all_wildcard_models.extend(wildcard_models)
else:
# get all known provider models
wildcard_models = get_known_models_from_wildcard(wildcard_model=model)
if wildcard_models is not None:
models_to_remove.add(model)
all_wildcard_models.extend(wildcard_models)
if wildcard_models is not None:
models_to_remove.add(model)
all_wildcard_models.extend(wildcard_models)
for model in models_to_remove:
unique_models.remove(model)

View file

@ -12,7 +12,7 @@ class X42PromptManagement(CustomPromptManagement):
model: str,
messages: List[AllMessageValues],
non_default_params: dict,
prompt_id: str,
prompt_id: Optional[str],
prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[str, List[AllMessageValues], dict]:

View file

@ -6,6 +6,7 @@ Provider-specific Pass-Through Endpoints
Use litellm with Anthropic SDK, Vertex AI SDK, Cohere SDK, etc.
"""
import os
from typing import Optional
import httpx
@ -43,6 +44,84 @@ def create_request_copy(request: Request):
}
async def llm_passthrough_factory_proxy_route(
custom_llm_provider: str,
endpoint: str,
request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
Factory function for creating pass-through endpoints for LLM providers.
"""
from litellm.types.utils import LlmProviders
from litellm.utils import ProviderConfigManager
provider_config = ProviderConfigManager.get_provider_model_info(
provider=LlmProviders(custom_llm_provider),
model=None,
)
if provider_config is None:
raise HTTPException(
status_code=404, detail=f"Provider {custom_llm_provider} not found"
)
base_target_url = provider_config.get_api_base()
if base_target_url is None:
raise HTTPException(
status_code=404, detail=f"Provider {custom_llm_provider} api base not found"
)
encoded_endpoint = httpx.URL(endpoint).path
# Ensure endpoint starts with '/' for proper URL construction
if not encoded_endpoint.startswith("/"):
encoded_endpoint = "/" + encoded_endpoint
# Construct the full target URL using httpx
base_url = httpx.URL(base_target_url)
updated_url = base_url.copy_with(path=encoded_endpoint)
# Add or update query parameters
provider_api_key = passthrough_endpoint_router.get_credentials(
custom_llm_provider=custom_llm_provider,
region_name=None,
)
auth_headers = provider_config.validate_environment(
headers={},
model="",
messages=[],
optional_params={},
litellm_params={},
api_key=provider_api_key,
api_base=base_target_url,
)
## check for streaming
is_streaming_request = False
# anthropic is streaming when 'stream' = True is in the body
if request.method == "POST":
_request_body = await request.json()
if _request_body.get("stream"):
is_streaming_request = True
## CREATE PASS-THROUGH
endpoint_func = create_pass_through_route(
endpoint=endpoint,
target=str(updated_url),
custom_headers=auth_headers,
) # dynamically construct pass-through endpoint based on incoming path
received_value = await endpoint_func(
request,
fastapi_response,
user_api_key_dict,
stream=is_streaming_request, # type: ignore
)
return received_value
@router.api_route(
"/gemini/{endpoint:path}",
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
@ -162,6 +241,84 @@ async def cohere_proxy_route(
return received_value
@router.api_route(
"/vllm/{endpoint:path}",
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
tags=["VLLM Pass-through", "pass-through"],
)
async def vllm_proxy_route(
endpoint: str,
request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
[Docs](https://docs.litellm.ai/docs/pass_through/vllm)
"""
return await llm_passthrough_factory_proxy_route(
endpoint=endpoint,
request=request,
fastapi_response=fastapi_response,
user_api_key_dict=user_api_key_dict,
custom_llm_provider="vllm",
)
@router.api_route(
"/mistral/{endpoint:path}",
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
tags=["Mistral Pass-through", "pass-through"],
)
async def mistral_proxy_route(
endpoint: str,
request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
[Docs](https://docs.litellm.ai/docs/anthropic_completion)
"""
base_target_url = os.getenv("MISTRAL_API_BASE") or "https://api.mistral.ai"
encoded_endpoint = httpx.URL(endpoint).path
# Ensure endpoint starts with '/' for proper URL construction
if not encoded_endpoint.startswith("/"):
encoded_endpoint = "/" + encoded_endpoint
# Construct the full target URL using httpx
base_url = httpx.URL(base_target_url)
updated_url = base_url.copy_with(path=encoded_endpoint)
# Add or update query parameters
mistral_api_key = passthrough_endpoint_router.get_credentials(
custom_llm_provider="mistral",
region_name=None,
)
## check for streaming
is_streaming_request = False
# anthropic is streaming when 'stream' = True is in the body
if request.method == "POST":
_request_body = await request.json()
if _request_body.get("stream"):
is_streaming_request = True
## CREATE PASS-THROUGH
endpoint_func = create_pass_through_route(
endpoint=endpoint,
target=str(updated_url),
custom_headers={"Authorization": "Bearer {}".format(mistral_api_key)},
) # dynamically construct pass-through endpoint based on incoming path
received_value = await endpoint_func(
request,
fastapi_response,
user_api_key_dict,
stream=is_streaming_request, # type: ignore
)
return received_value
@router.api_route(
"/anthropic/{endpoint:path}",
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],

View file

@ -0,0 +1,219 @@
import json
from datetime import datetime
from typing import TYPE_CHECKING, Any, List, Optional, Union
import httpx
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.litellm_core_utils.litellm_logging import (
get_standard_logging_object_payload,
)
from litellm.llms.base_llm.chat.transformation import BaseConfig
from litellm.proxy._types import PassThroughEndpointLoggingTypedDict
from litellm.proxy.auth.auth_utils import get_end_user_id_from_request_body
from litellm.proxy.pass_through_endpoints.types import PassthroughStandardLoggingPayload
from litellm.types.utils import LlmProviders, ModelResponse, TextCompletionResponse
if TYPE_CHECKING:
from ..success_handler import PassThroughEndpointLogging
from ..types import EndpointType
else:
PassThroughEndpointLogging = Any
EndpointType = Any
from abc import ABC, abstractmethod
class BasePassthroughLoggingHandler(ABC):
@property
@abstractmethod
def llm_provider_name(self) -> LlmProviders:
pass
@abstractmethod
def get_provider_config(self, model: str) -> BaseConfig:
pass
def passthrough_chat_handler(
self,
httpx_response: httpx.Response,
response_body: dict,
logging_obj: LiteLLMLoggingObj,
url_route: str,
result: str,
start_time: datetime,
end_time: datetime,
cache_hit: bool,
request_body: dict,
**kwargs,
) -> PassThroughEndpointLoggingTypedDict:
"""
Transforms LLM response to OpenAI response, generates a standard logging object so downstream logging can be handled
"""
model = request_body.get("model", response_body.get("model", ""))
provider_config = self.get_provider_config(model=model)
litellm_model_response: ModelResponse = provider_config.transform_response(
raw_response=httpx_response,
model_response=litellm.ModelResponse(),
model=model,
messages=[],
logging_obj=logging_obj,
optional_params={},
api_key="",
request_data={},
encoding=litellm.encoding,
json_mode=False,
litellm_params={},
)
kwargs = self._create_response_logging_payload(
litellm_model_response=litellm_model_response,
model=model,
kwargs=kwargs,
start_time=start_time,
end_time=end_time,
logging_obj=logging_obj,
)
return {
"result": litellm_model_response,
"kwargs": kwargs,
}
def _get_user_from_metadata(
self,
passthrough_logging_payload: PassthroughStandardLoggingPayload,
) -> Optional[str]:
request_body = passthrough_logging_payload.get("request_body")
if request_body:
return get_end_user_id_from_request_body(request_body)
return None
def _create_response_logging_payload(
self,
litellm_model_response: Union[ModelResponse, TextCompletionResponse],
model: str,
kwargs: dict,
start_time: datetime,
end_time: datetime,
logging_obj: LiteLLMLoggingObj,
) -> dict:
"""
Create the standard logging object for Generic LLM passthrough
handles streaming and non-streaming responses
"""
try:
response_cost = litellm.completion_cost(
completion_response=litellm_model_response,
model=model,
)
kwargs["response_cost"] = response_cost
kwargs["model"] = model
passthrough_logging_payload: Optional[PassthroughStandardLoggingPayload] = ( # type: ignore
kwargs.get("passthrough_logging_payload")
)
if passthrough_logging_payload:
user = self._get_user_from_metadata(
passthrough_logging_payload=passthrough_logging_payload,
)
if user:
kwargs.setdefault("litellm_params", {})
kwargs["litellm_params"].update(
{"proxy_server_request": {"body": {"user": user}}}
)
# Make standard logging object for Anthropic
standard_logging_object = get_standard_logging_object_payload(
kwargs=kwargs,
init_response_obj=litellm_model_response,
start_time=start_time,
end_time=end_time,
logging_obj=logging_obj,
status="success",
)
# pretty print standard logging object
verbose_proxy_logger.debug(
"standard_logging_object= %s",
json.dumps(standard_logging_object, indent=4),
)
kwargs["standard_logging_object"] = standard_logging_object
# set litellm_call_id to logging response object
litellm_model_response.id = logging_obj.litellm_call_id
litellm_model_response.model = model
logging_obj.model_call_details["model"] = model
return kwargs
except Exception as e:
verbose_proxy_logger.exception(
"Error creating LLM passthrough response logging payload: %s", e
)
return kwargs
@abstractmethod
def _build_complete_streaming_response(
self,
all_chunks: List[str],
litellm_logging_obj: LiteLLMLoggingObj,
model: str,
) -> Optional[Union[ModelResponse, TextCompletionResponse]]:
"""
Builds complete response from raw chunks
- Converts str chunks to generic chunks
- Converts generic chunks to litellm chunks (OpenAI format)
- Builds complete response from litellm chunks
"""
pass
def _handle_logging_llm_collected_chunks(
self,
litellm_logging_obj: LiteLLMLoggingObj,
passthrough_success_handler_obj: PassThroughEndpointLogging,
url_route: str,
request_body: dict,
endpoint_type: EndpointType,
start_time: datetime,
all_chunks: List[str],
end_time: datetime,
) -> PassThroughEndpointLoggingTypedDict:
"""
Takes raw chunks from Anthropic passthrough endpoint and logs them in litellm callbacks
- Builds complete response from chunks
- Creates standard logging object
- Logs in litellm callbacks
"""
model = request_body.get("model", "")
complete_streaming_response = self._build_complete_streaming_response(
all_chunks=all_chunks,
litellm_logging_obj=litellm_logging_obj,
model=model,
)
if complete_streaming_response is None:
verbose_proxy_logger.error(
"Unable to build complete streaming response for Anthropic passthrough endpoint, not logging..."
)
return {
"result": None,
"kwargs": {},
}
kwargs = self._create_response_logging_payload(
litellm_model_response=complete_streaming_response,
model=model,
kwargs={},
start_time=start_time,
end_time=end_time,
logging_obj=litellm_logging_obj,
)
return {
"result": complete_streaming_response,
"kwargs": kwargs,
}

View file

@ -0,0 +1,56 @@
from typing import List, Optional, Union
from litellm import stream_chunk_builder
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
from litellm.llms.base_llm.chat.transformation import BaseConfig
from litellm.llms.cohere.chat.v2_transformation import CohereV2ChatConfig
from litellm.llms.cohere.common_utils import (
ModelResponseIterator as CohereModelResponseIterator,
)
from litellm.types.utils import LlmProviders, ModelResponse, TextCompletionResponse
from .base_passthrough_logging_handler import BasePassthroughLoggingHandler
class CoherePassthroughLoggingHandler(BasePassthroughLoggingHandler):
@property
def llm_provider_name(self) -> LlmProviders:
return LlmProviders.COHERE
def get_provider_config(self, model: str) -> BaseConfig:
return CohereV2ChatConfig()
def _build_complete_streaming_response(
self,
all_chunks: List[str],
litellm_logging_obj: LiteLLMLoggingObj,
model: str,
) -> Optional[Union[ModelResponse, TextCompletionResponse]]:
cohere_model_response_iterator = CohereModelResponseIterator(
streaming_response=None,
sync_stream=False,
)
litellm_custom_stream_wrapper = CustomStreamWrapper(
completion_stream=cohere_model_response_iterator,
model=model,
logging_obj=litellm_logging_obj,
custom_llm_provider="cohere",
)
all_openai_chunks = []
for _chunk_str in all_chunks:
try:
generic_chunk = (
cohere_model_response_iterator.convert_str_chunk_to_generic_chunk(
chunk=_chunk_str
)
)
litellm_chunk = litellm_custom_stream_wrapper.chunk_creator(
chunk=generic_chunk
)
if litellm_chunk is not None:
all_openai_chunks.append(litellm_chunk)
except (StopIteration, StopAsyncIteration):
break
complete_streaming_response = stream_chunk_builder(chunks=all_openai_chunks)
return complete_streaming_response

View file

@ -683,6 +683,7 @@ async def pass_through_request( # noqa: PLR0915
end_time=end_time,
logging_obj=logging_obj,
cache_hit=False,
request_body=_parsed_body,
**kwargs,
)
)

View file

@ -16,10 +16,15 @@ from .llm_provider_handlers.anthropic_passthrough_logging_handler import (
from .llm_provider_handlers.assembly_passthrough_logging_handler import (
AssemblyAIPassthroughLoggingHandler,
)
from .llm_provider_handlers.cohere_passthrough_logging_handler import (
CoherePassthroughLoggingHandler,
)
from .llm_provider_handlers.vertex_passthrough_logging_handler import (
VertexPassthroughLoggingHandler,
)
cohere_passthrough_logging_handler = CoherePassthroughLoggingHandler()
class PassThroughEndpointLogging:
def __init__(self):
@ -32,6 +37,8 @@ class PassThroughEndpointLogging:
# Anthropic
self.TRACKED_ANTHROPIC_ROUTES = ["/messages"]
# Cohere
self.TRACKED_COHERE_ROUTES = ["/v2/chat"]
self.assemblyai_passthrough_logging_handler = (
AssemblyAIPassthroughLoggingHandler()
)
@ -84,6 +91,7 @@ class PassThroughEndpointLogging:
start_time: datetime,
end_time: datetime,
cache_hit: bool,
request_body: dict,
**kwargs,
):
standard_logging_response_object: Optional[
@ -125,6 +133,25 @@ class PassThroughEndpointLogging:
anthropic_passthrough_logging_handler_result["result"]
)
kwargs = anthropic_passthrough_logging_handler_result["kwargs"]
elif self.is_cohere_route(url_route):
cohere_passthrough_logging_handler_result = (
cohere_passthrough_logging_handler.passthrough_chat_handler(
httpx_response=httpx_response,
response_body=response_body or {},
logging_obj=logging_obj,
url_route=url_route,
result=result,
start_time=start_time,
end_time=end_time,
cache_hit=cache_hit,
request_body=request_body,
**kwargs,
)
)
standard_logging_response_object = (
cohere_passthrough_logging_handler_result["result"]
)
kwargs = cohere_passthrough_logging_handler_result["kwargs"]
elif self.is_assemblyai_route(url_route):
if (
AssemblyAIPassthroughLoggingHandler._should_log_request(
@ -173,6 +200,11 @@ class PassThroughEndpointLogging:
return True
return False
def is_cohere_route(self, url_route: str):
for route in self.TRACKED_COHERE_ROUTES:
if route in url_route:
return True
def is_assemblyai_route(self, url_route: str):
parsed_url = urlparse(url_route)
if parsed_url.hostname == "api.assemblyai.com":

View file

@ -803,9 +803,9 @@ model_max_budget_limiter = _PROXY_VirtualKeyModelMaxBudgetLimiter(
dual_cache=user_api_key_cache
)
litellm.logging_callback_manager.add_litellm_callback(model_max_budget_limiter)
redis_usage_cache: Optional[RedisCache] = (
None # redis cache used for tracking spend, tpm/rpm limits
)
redis_usage_cache: Optional[
RedisCache
] = None # redis cache used for tracking spend, tpm/rpm limits
user_custom_auth = None
user_custom_key_generate = None
user_custom_sso = None
@ -1131,9 +1131,9 @@ async def update_cache( # noqa: PLR0915
_id = "team_id:{}".format(team_id)
try:
# Fetch the existing cost for the given user
existing_spend_obj: Optional[LiteLLM_TeamTable] = (
await user_api_key_cache.async_get_cache(key=_id)
)
existing_spend_obj: Optional[
LiteLLM_TeamTable
] = await user_api_key_cache.async_get_cache(key=_id)
if existing_spend_obj is None:
# do nothing if team not in api key cache
return
@ -2812,9 +2812,9 @@ async def initialize( # noqa: PLR0915
user_api_base = api_base
dynamic_config[user_model]["api_base"] = api_base
if api_version:
os.environ["AZURE_API_VERSION"] = (
api_version # set this for azure - litellm can read this from the env
)
os.environ[
"AZURE_API_VERSION"
] = api_version # set this for azure - litellm can read this from the env
if max_tokens: # model-specific param
dynamic_config[user_model]["max_tokens"] = max_tokens
if temperature: # model-specific param
@ -3316,6 +3316,7 @@ async def model_list(
user_model=user_model,
infer_model_from_keys=general_settings.get("infer_model_from_keys", False),
return_wildcard_routes=return_wildcard_routes,
llm_router=llm_router,
)
return dict(
@ -7758,9 +7759,9 @@ async def get_config_list(
hasattr(sub_field_info, "description")
and sub_field_info.description is not None
):
nested_fields[idx].field_description = (
sub_field_info.description
)
nested_fields[
idx
].field_description = sub_field_info.description
idx += 1
_stored_in_db = None

View file

@ -0,0 +1,17 @@
from typing import Literal, Optional, TypedDict, Union
from litellm.types.llms.openai import ChatCompletionCachedContent
class CacheControlMessageInjectionPoint(TypedDict):
"""Type for message-level injection points."""
location: Literal["message"]
role: Optional[
Literal["user", "system", "assistant"]
] # Optional: target by role (user, system, assistant)
index: Optional[Union[int, str]] # Optional: target by specific index
control: Optional[ChatCompletionCachedContent]
CacheControlInjectionPoint = CacheControlMessageInjectionPoint

View file

@ -44,3 +44,59 @@ class ChatHistoryChatBot(TypedDict, total=False):
ChatHistory = List[
Union[ChatHistorySystem, ChatHistoryChatBot, ChatHistoryUser, ChatHistoryToolResult]
]
class CohereV2ChatResponseMessageToolCallFunction(TypedDict, total=False):
name: str
parameters: dict
class CohereV2ChatResponseMessageToolCall(TypedDict):
id: str
type: Literal["function"]
function: CohereV2ChatResponseMessageToolCallFunction
class CohereV2ChatResponseMessageContent(TypedDict):
id: str
type: Literal["tool"]
tool: str
class CohereV2ChatResponseMessage(TypedDict, total=False):
role: Required[Literal["assistant"]]
tool_calls: List[CohereV2ChatResponseMessageToolCall]
tool_plan: str
content: List[CohereV2ChatResponseMessageContent]
citations: List[dict]
class CohereV2ChatResponseUsageBilledUnits(TypedDict, total=False):
input_tokens: int
output_tokens: int
search_units: int
classifications: int
class CohereV2ChatResponseUsageTokens(TypedDict, total=False):
input_tokens: int
output_tokens: int
class CohereV2ChatResponseUsage(TypedDict, total=False):
billed_units: CohereV2ChatResponseUsageBilledUnits
tokens: CohereV2ChatResponseUsageTokens
class CohereV2ChatResponseLogProbs(TypedDict, total=False):
token_ids: Required[List[int]]
text: str
logprobs: List[float]
class CohereV2ChatResponse(TypedDict):
id: str
finish_reason: str
message: CohereV2ChatResponseMessage
usage: CohereV2ChatResponseUsage
logprobs: CohereV2ChatResponseLogProbs

View file

@ -516,9 +516,9 @@ def function_setup( # noqa: PLR0915
function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
## DYNAMIC CALLBACKS ##
dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
kwargs.pop("callbacks", None)
)
dynamic_callbacks: Optional[
List[Union[str, Callable, CustomLogger]]
] = kwargs.pop("callbacks", None)
all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
if len(all_callbacks) > 0:
@ -1202,9 +1202,9 @@ def client(original_function): # noqa: PLR0915
exception=e,
retry_policy=kwargs.get("retry_policy"),
)
kwargs["retry_policy"] = (
reset_retry_policy()
) # prevent infinite loops
kwargs[
"retry_policy"
] = reset_retry_policy() # prevent infinite loops
litellm.num_retries = (
None # set retries to None to prevent infinite loops
)
@ -3013,16 +3013,16 @@ def get_optional_params( # noqa: PLR0915
True # so that main.py adds the function call to the prompt
)
if "tools" in non_default_params:
optional_params["functions_unsupported_model"] = (
non_default_params.pop("tools")
)
optional_params[
"functions_unsupported_model"
] = non_default_params.pop("tools")
non_default_params.pop(
"tool_choice", None
) # causes ollama requests to hang
elif "functions" in non_default_params:
optional_params["functions_unsupported_model"] = (
non_default_params.pop("functions")
)
optional_params[
"functions_unsupported_model"
] = non_default_params.pop("functions")
elif (
litellm.add_function_to_prompt
): # if user opts to add it to prompt instead
@ -3045,10 +3045,10 @@ def get_optional_params( # noqa: PLR0915
if "response_format" in non_default_params:
if provider_config is not None:
non_default_params["response_format"] = (
provider_config.get_json_schema_from_pydantic_object(
response_format=non_default_params["response_format"]
)
non_default_params[
"response_format"
] = provider_config.get_json_schema_from_pydantic_object(
response_format=non_default_params["response_format"]
)
else:
non_default_params["response_format"] = type_to_response_format_param(
@ -4064,9 +4064,9 @@ def _count_characters(text: str) -> int:
def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str:
_choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = (
response_obj.choices
)
_choices: Union[
List[Union[Choices, StreamingChoices]], List[StreamingChoices]
] = response_obj.choices
response_str = ""
for choice in _choices:
@ -4458,14 +4458,14 @@ def _get_model_info_helper( # noqa: PLR0915
if combined_model_name in litellm.model_cost:
key = combined_model_name
_model_info = _get_model_info_from_model_cost(key=key)
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and model in litellm.model_cost:
key = model
_model_info = _get_model_info_from_model_cost(key=key)
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
@ -4475,21 +4475,21 @@ def _get_model_info_helper( # noqa: PLR0915
and combined_stripped_model_name in litellm.model_cost
):
key = combined_stripped_model_name
_model_info = _get_model_info_from_model_cost(key=key)
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and stripped_model_name in litellm.model_cost:
key = stripped_model_name
_model_info = _get_model_info_from_model_cost(key=key)
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
_model_info = None
if _model_info is None and split_model in litellm.model_cost:
key = split_model
_model_info = _get_model_info_from_model_cost(key=key)
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
if not _check_provider_match(
model_info=_model_info, custom_llm_provider=custom_llm_provider
):
@ -5807,8 +5807,133 @@ def trim_messages(
return messages
from litellm.caching.in_memory_cache import InMemoryCache
class AvailableModelsCache(InMemoryCache):
def __init__(self, ttl_seconds: int = 300, max_size: int = 1000):
super().__init__(ttl_seconds, max_size)
self._env_hash: Optional[str] = None
def _get_env_hash(self) -> str:
"""Create a hash of relevant environment variables"""
env_vars = {
k: v
for k, v in os.environ.items()
if k.startswith(("OPENAI", "ANTHROPIC", "AZURE", "AWS"))
}
return str(hash(frozenset(env_vars.items())))
def _check_env_changed(self) -> bool:
"""Check if environment variables have changed"""
current_hash = self._get_env_hash()
if self._env_hash is None:
self._env_hash = current_hash
return True
return current_hash != self._env_hash
def _get_cache_key(
self,
custom_llm_provider: Optional[str],
litellm_params: Optional[LiteLLM_Params],
) -> str:
valid_str = ""
if litellm_params is not None:
valid_str = litellm_params.model_dump_json()
if custom_llm_provider is not None:
valid_str = f"{custom_llm_provider}:{valid_str}"
return hashlib.sha256(valid_str.encode()).hexdigest()
def get_cached_model_info(
self,
custom_llm_provider: Optional[str] = None,
litellm_params: Optional[LiteLLM_Params] = None,
) -> Optional[List[str]]:
"""Get cached model info"""
# Check if environment has changed
if litellm_params is None and self._check_env_changed():
self.cache_dict.clear()
return None
cache_key = self._get_cache_key(custom_llm_provider, litellm_params)
result = cast(Optional[List[str]], self.get_cache(cache_key))
if result is not None:
return copy.deepcopy(result)
return result
def set_cached_model_info(
self,
custom_llm_provider: str,
litellm_params: Optional[LiteLLM_Params],
available_models: List[str],
):
"""Set cached model info"""
cache_key = self._get_cache_key(custom_llm_provider, litellm_params)
self.set_cache(cache_key, copy.deepcopy(available_models))
# Global cache instance
_model_cache = AvailableModelsCache()
def _infer_valid_provider_from_env_vars(
custom_llm_provider: Optional[str] = None,
) -> List[str]:
valid_providers: List[str] = []
environ_keys = os.environ.keys()
for provider in litellm.provider_list:
if custom_llm_provider and provider != custom_llm_provider:
continue
# edge case litellm has together_ai as a provider, it should be togetherai
env_provider_1 = provider.replace("_", "")
env_provider_2 = provider
# litellm standardizes expected provider keys to
# PROVIDER_API_KEY. Example: OPENAI_API_KEY, COHERE_API_KEY
expected_provider_key_1 = f"{env_provider_1.upper()}_API_KEY"
expected_provider_key_2 = f"{env_provider_2.upper()}_API_KEY"
if (
expected_provider_key_1 in environ_keys
or expected_provider_key_2 in environ_keys
):
# key is set
valid_providers.append(provider)
return valid_providers
def _get_valid_models_from_provider_api(
provider_config: BaseLLMModelInfo,
custom_llm_provider: str,
litellm_params: Optional[LiteLLM_Params] = None,
) -> List[str]:
try:
cached_result = _model_cache.get_cached_model_info(
custom_llm_provider, litellm_params
)
if cached_result is not None:
return cached_result
models = provider_config.get_models(
api_key=litellm_params.api_key if litellm_params is not None else None,
api_base=litellm_params.api_base if litellm_params is not None else None,
)
_model_cache.set_cached_model_info(custom_llm_provider, litellm_params, models)
return models
except Exception as e:
verbose_logger.debug(f"Error getting valid models: {e}")
return []
def get_valid_models(
check_provider_endpoint: bool = False, custom_llm_provider: Optional[str] = None
check_provider_endpoint: Optional[bool] = None,
custom_llm_provider: Optional[str] = None,
litellm_params: Optional[LiteLLM_Params] = None,
) -> List[str]:
"""
Returns a list of valid LLMs based on the set environment variables
@ -5819,31 +5944,21 @@ def get_valid_models(
Returns:
A list of valid LLMs
"""
try:
check_provider_endpoint = (
check_provider_endpoint or litellm.check_provider_endpoint
)
# get keys set in .env
environ_keys = os.environ.keys()
valid_providers = []
valid_providers: List[str] = []
valid_models: List[str] = []
# for all valid providers, make a list of supported llms
valid_models = []
for provider in litellm.provider_list:
if custom_llm_provider and provider != custom_llm_provider:
continue
# edge case litellm has together_ai as a provider, it should be togetherai
env_provider_1 = provider.replace("_", "")
env_provider_2 = provider
# litellm standardizes expected provider keys to
# PROVIDER_API_KEY. Example: OPENAI_API_KEY, COHERE_API_KEY
expected_provider_key_1 = f"{env_provider_1.upper()}_API_KEY"
expected_provider_key_2 = f"{env_provider_2.upper()}_API_KEY"
if (
expected_provider_key_1 in environ_keys
or expected_provider_key_2 in environ_keys
):
# key is set
valid_providers.append(provider)
if custom_llm_provider:
valid_providers = [custom_llm_provider]
else:
valid_providers = _infer_valid_provider_from_env_vars(custom_llm_provider)
for provider in valid_providers:
provider_config = ProviderConfigManager.get_provider_model_info(
@ -5856,15 +5971,24 @@ def get_valid_models(
if provider == "azure":
valid_models.append("Azure-LLM")
elif provider_config is not None and check_provider_endpoint:
try:
models = provider_config.get_models()
valid_models.extend(models)
except Exception as e:
verbose_logger.debug(f"Error getting valid models: {e}")
elif (
provider_config is not None
and check_provider_endpoint
and provider is not None
):
valid_models.extend(
_get_valid_models_from_provider_api(
provider_config,
provider,
litellm_params,
)
)
else:
models_for_provider = litellm.models_by_provider.get(provider, [])
models_for_provider = copy.deepcopy(
litellm.models_by_provider.get(provider, [])
)
valid_models.extend(models_for_provider)
return valid_models
except Exception as e:
verbose_logger.debug(f"Error getting valid models: {e}")
@ -6510,7 +6634,12 @@ class ProviderConfigManager:
return litellm.AnthropicModelInfo()
elif LlmProviders.XAI == provider:
return litellm.XAIModelInfo()
elif LlmProviders.VLLM == provider:
from litellm.llms.vllm.common_utils import (
VLLMModelInfo, # experimental approach, to reduce bloat on __init__.py
)
return VLLMModelInfo()
return None
@staticmethod

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.66.0"
version = "1.66.1"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -118,7 +118,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.66.0"
version = "1.66.1"
version_files = [
"pyproject.toml:^version"
]

View file

@ -0,0 +1,169 @@
import datetime
import json
import os
import sys
import unittest
from typing import List, Optional, Tuple
from unittest.mock import ANY, MagicMock, Mock, patch
import httpx
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system-path
import litellm
from litellm.integrations.anthropic_cache_control_hook import AnthropicCacheControlHook
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import StandardCallbackDynamicParams
@pytest.mark.asyncio
async def test_anthropic_cache_control_hook_system_message():
# Use patch.dict to mock environment variables instead of setting them directly
with patch.dict(
os.environ,
{
"AWS_ACCESS_KEY_ID": "fake_access_key_id",
"AWS_SECRET_ACCESS_KEY": "fake_secret_access_key",
"AWS_REGION_NAME": "us-west-2",
},
):
anthropic_cache_control_hook = AnthropicCacheControlHook()
litellm.callbacks = [anthropic_cache_control_hook]
# Mock response data
mock_response = MagicMock()
mock_response.json.return_value = {
"output": {
"message": {
"role": "assistant",
"content": "Here is my analysis of the key terms and conditions...",
}
},
"stopReason": "stop_sequence",
"usage": {
"inputTokens": 100,
"outputTokens": 200,
"totalTokens": 300,
"cacheReadInputTokens": 100,
"cacheWriteInputTokens": 200,
},
}
mock_response.status_code = 200
# Mock AsyncHTTPHandler.post method
client = AsyncHTTPHandler()
with patch.object(client, "post", return_value=mock_response) as mock_post:
response = await litellm.acompletion(
model="bedrock/anthropic.claude-3-5-haiku-20241022-v1:0",
messages=[
{
"role": "system",
"content": [
{
"type": "text",
"text": "You are an AI assistant tasked with analyzing legal documents.",
},
{
"type": "text",
"text": "Here is the full text of a complex legal agreement",
},
],
},
{
"role": "user",
"content": "what are the key terms and conditions in this agreement?",
},
],
cache_control_injection_points=[
{
"location": "message",
"role": "system",
},
],
client=client,
)
mock_post.assert_called_once()
request_body = json.loads(mock_post.call_args.kwargs["data"])
print("request_body: ", json.dumps(request_body, indent=4))
# Verify the request body
assert request_body["system"][1]["cachePoint"] == {"type": "default"}
@pytest.mark.asyncio
async def test_anthropic_cache_control_hook_user_message():
# Use patch.dict to mock environment variables instead of setting them directly
with patch.dict(
os.environ,
{
"AWS_ACCESS_KEY_ID": "fake_access_key_id",
"AWS_SECRET_ACCESS_KEY": "fake_secret_access_key",
"AWS_REGION_NAME": "us-west-2",
},
):
anthropic_cache_control_hook = AnthropicCacheControlHook()
litellm.callbacks = [anthropic_cache_control_hook]
# Mock response data
mock_response = MagicMock()
mock_response.json.return_value = {
"output": {
"message": {
"role": "assistant",
"content": "Here is my analysis of the key terms and conditions...",
}
},
"stopReason": "stop_sequence",
"usage": {
"inputTokens": 100,
"outputTokens": 200,
"totalTokens": 300,
"cacheReadInputTokens": 100,
"cacheWriteInputTokens": 200,
},
}
mock_response.status_code = 200
# Mock AsyncHTTPHandler.post method
client = AsyncHTTPHandler()
with patch.object(client, "post", return_value=mock_response) as mock_post:
response = await litellm.acompletion(
model="bedrock/anthropic.claude-3-5-haiku-20241022-v1:0",
messages=[
{
"role": "assistant",
"content": [
{
"type": "text",
"text": "You are an AI assistant tasked with analyzing legal documents.",
},
],
},
{
"role": "user",
"content": "what are the key terms and conditions in this agreement? <very_long_text>",
},
],
cache_control_injection_points=[
{
"location": "message",
"role": "user",
},
],
client=client,
)
mock_post.assert_called_once()
request_body = json.loads(mock_post.call_args.kwargs["data"])
print("request_body: ", json.dumps(request_body, indent=4))
# Verify the request body
assert request_body["messages"][1]["content"][1]["cachePoint"] == {
"type": "default"
}

View file

@ -30,7 +30,7 @@ class TestCustomPromptManagement(CustomPromptManagement):
model: str,
messages: List[AllMessageValues],
non_default_params: dict,
prompt_id: str,
prompt_id: Optional[str],
prompt_variables: Optional[dict],
dynamic_callback_params: StandardCallbackDynamicParams,
) -> Tuple[str, List[AllMessageValues], dict]:

View file

@ -78,6 +78,33 @@ def test_initialize_with_api_key(setup_mocks):
assert result["azure_ad_token"] is None
def test_initialize_with_tenant_credentials_env_var(setup_mocks, monkeypatch):
monkeypatch.setenv("AZURE_TENANT_ID", "test-tenant-id")
monkeypatch.setenv("AZURE_CLIENT_ID", "test-client-id")
monkeypatch.setenv("AZURE_CLIENT_SECRET", "test-client-secret")
result = BaseAzureLLM().initialize_azure_sdk_client(
litellm_params={},
api_key=None,
api_base="https://test.openai.azure.com",
model_name="gpt-4",
api_version=None,
is_async=False,
)
# Verify that get_azure_ad_token_from_entrata_id was called
setup_mocks["entrata_token"].assert_called_once_with(
tenant_id="test-tenant-id",
client_id="test-client-id",
client_secret="test-client-secret",
)
# Verify expected result
assert result["api_key"] is None
assert result["azure_endpoint"] == "https://test.openai.azure.com"
assert "azure_ad_token_provider" in result
def test_initialize_with_tenant_credentials(setup_mocks):
# Test with tenant_id, client_id, and client_secret provided
result = BaseAzureLLM().initialize_azure_sdk_client(
@ -150,8 +177,12 @@ def test_initialize_with_oidc_token(setup_mocks):
assert result["azure_ad_token"] == "mock-oidc-token"
def test_initialize_with_enable_token_refresh(setup_mocks):
def test_initialize_with_enable_token_refresh(setup_mocks, monkeypatch):
litellm._turn_on_debug()
# Enable token refresh
monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
monkeypatch.delenv("AZURE_CLIENT_SECRET", raising=False)
monkeypatch.delenv("AZURE_TENANT_ID", raising=False)
setup_mocks["litellm"].enable_azure_ad_token_refresh = True
# Test with token refresh enabled
@ -171,8 +202,11 @@ def test_initialize_with_enable_token_refresh(setup_mocks):
assert "azure_ad_token_provider" in result
def test_initialize_with_token_refresh_error(setup_mocks):
def test_initialize_with_token_refresh_error(setup_mocks, monkeypatch):
# Enable token refresh but make it raise an error
monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
monkeypatch.delenv("AZURE_CLIENT_SECRET", raising=False)
monkeypatch.delenv("AZURE_TENANT_ID", raising=False)
setup_mocks["litellm"].enable_azure_ad_token_refresh = True
setup_mocks["token_provider"].side_effect = ValueError("Token provider error")

View file

@ -41,8 +41,10 @@ from unittest.mock import AsyncMock, MagicMock, patch
# Assuming your trim_messages, shorten_message_to_fit_limit, and get_token_count functions are all in a module named 'message_utils'
@pytest.fixture(autouse=True)
def reset_mock_cache():
from litellm.utils import _model_cache
_model_cache.flush_cache()
# Test 1: Check trimming of normal message
def test_basic_trimming():
messages = [
@ -1539,6 +1541,7 @@ def test_get_valid_models_fireworks_ai(monkeypatch):
litellm.module_level_client, "get", return_value=mock_response
) as mock_post:
valid_models = get_valid_models(check_provider_endpoint=True)
print("valid_models", valid_models)
mock_post.assert_called_once()
assert (
"fireworks_ai/accounts/fireworks/models/llama-3.1-8b-instruct"
@ -2122,3 +2125,58 @@ def test_get_provider_audio_transcription_config():
config = ProviderConfigManager.get_provider_audio_transcription_config(
model="whisper-1", provider=provider
)
def test_get_valid_models_from_provider():
"""
Test that get_valid_models returns the correct models for a given provider
"""
from litellm.utils import get_valid_models
valid_models = get_valid_models(custom_llm_provider="openai")
assert len(valid_models) > 0
assert "gpt-4o-mini" in valid_models
print("Valid models: ", valid_models)
valid_models.remove("gpt-4o-mini")
assert "gpt-4o-mini" not in valid_models
valid_models = get_valid_models(custom_llm_provider="openai")
assert len(valid_models) > 0
assert "gpt-4o-mini" in valid_models
def test_get_valid_models_from_provider_cache_invalidation(monkeypatch):
"""
Test that get_valid_models returns the correct models for a given provider
"""
from litellm.utils import _model_cache
monkeypatch.setenv("OPENAI_API_KEY", "123")
_model_cache.set_cached_model_info("openai", litellm_params=None, available_models=["gpt-4o-mini"])
monkeypatch.delenv("OPENAI_API_KEY")
assert _model_cache.get_cached_model_info("openai") is None
def test_get_valid_models_from_dynamic_api_key():
"""
Test that get_valid_models returns the correct models for a given provider
"""
from litellm.utils import get_valid_models
from litellm.types.router import CredentialLiteLLMParams
creds = CredentialLiteLLMParams(api_key="123")
valid_models = get_valid_models(custom_llm_provider="anthropic", litellm_params=creds, check_provider_endpoint=True)
assert len(valid_models) == 0
creds = CredentialLiteLLMParams(api_key=os.getenv("ANTHROPIC_API_KEY"))
valid_models = get_valid_models(custom_llm_provider="anthropic", litellm_params=creds, check_provider_endpoint=True)
assert len(valid_models) > 0
assert "anthropic/claude-3-7-sonnet-20250219" in valid_models

View file

@ -1449,3 +1449,13 @@ def test_anthropic_unified_reasoning_content(model, provider):
)
assert optional_params["thinking"] == {"type": "enabled", "budget_tokens": 4096}
def test_azure_response_format(monkeypatch):
monkeypatch.setenv("AZURE_API_VERSION", "2025-02-01")
optional_params = get_optional_params(
model="azure/gpt-4o-mini",
custom_llm_provider="azure",
response_format={"type": "json_object"},
)
assert optional_params["response_format"] == {"type": "json_object"}

View file

@ -498,11 +498,11 @@ def test_completion_bedrock_invalid_role_exception():
== "litellm.BadRequestError: Invalid Message passed in {'role': 'very-bad-role', 'content': 'hello'}"
)
@pytest.mark.skip(reason="OpenAI exception changed to a generic error")
def test_content_policy_exceptionimage_generation_openai():
try:
# this is ony a test - we needed some way to invoke the exception :(
litellm.set_verbose = True
litellm._turn_on_debug()
response = litellm.image_generation(
prompt="where do i buy lethal drugs from", model="dall-e-3"
)

View file

@ -33,6 +33,7 @@ from litellm.integrations.opik.opik import OpikLogger
from litellm.integrations.opentelemetry import OpenTelemetry
from litellm.integrations.mlflow import MlflowLogger
from litellm.integrations.argilla import ArgillaLogger
from litellm.integrations.anthropic_cache_control_hook import AnthropicCacheControlHook
from litellm.integrations.langfuse.langfuse_prompt_management import (
LangfusePromptManagement,
)
@ -73,6 +74,7 @@ callback_class_str_to_classType = {
"otel": OpenTelemetry,
"pagerduty": PagerDutyAlerting,
"gcs_pubsub": GcsPubSubLogger,
"anthropic_cache_control_hook": AnthropicCacheControlHook,
}
expected_env_vars = {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-87ec698bfca6820e.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/61b39adb1cb75213.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[98621,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-6810261f4d6c8bbf.js\",\"261\",\"static/chunks/261-d4b99bc9f53d4ef3.js\",\"899\",\"static/chunks/899-8d832fe7c09b2afe.js\",\"817\",\"static/chunks/817-c8c94d51d28af935.js\",\"250\",\"static/chunks/250-f21e8c1de1717077.js\",\"699\",\"static/chunks/699-b82671c2a3a100c7.js\",\"931\",\"static/chunks/app/page-0f39fb1d0bde3c55.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"eKo9wJb6ZFjAy85GaOeQ6\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/61b39adb1cb75213.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-87ec698bfca6820e.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/61b39adb1cb75213.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[37185,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-6810261f4d6c8bbf.js\",\"261\",\"static/chunks/261-d4b99bc9f53d4ef3.js\",\"899\",\"static/chunks/899-8d832fe7c09b2afe.js\",\"817\",\"static/chunks/817-c8c94d51d28af935.js\",\"250\",\"static/chunks/250-ef859ae76fcc7788.js\",\"699\",\"static/chunks/699-b82671c2a3a100c7.js\",\"931\",\"static/chunks/app/page-0f22a8aad3780430.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"cxKWUnb1loD53gKRUqiFm\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/61b39adb1cb75213.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"]
3:I[98621,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","899","static/chunks/899-8d832fe7c09b2afe.js","817","static/chunks/817-c8c94d51d28af935.js","250","static/chunks/250-f21e8c1de1717077.js","699","static/chunks/699-b82671c2a3a100c7.js","931","static/chunks/app/page-0f39fb1d0bde3c55.js"],"default",1]
3:I[37185,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","899","static/chunks/899-8d832fe7c09b2afe.js","817","static/chunks/817-c8c94d51d28af935.js","250","static/chunks/250-ef859ae76fcc7788.js","699","static/chunks/699-b82671c2a3a100c7.js","931","static/chunks/app/page-0f22a8aad3780430.js"],"default",1]
4:I[4707,[],""]
5:I[36423,[],""]
0:["eKo9wJb6ZFjAy85GaOeQ6",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
0:["cxKWUnb1loD53gKRUqiFm",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"]
3:I[52829,["42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","250","static/chunks/250-f21e8c1de1717077.js","699","static/chunks/699-b82671c2a3a100c7.js","418","static/chunks/app/model_hub/page-068a441595bd0fc3.js"],"default",1]
3:I[52829,["42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","250","static/chunks/250-ef859ae76fcc7788.js","699","static/chunks/699-b82671c2a3a100c7.js","418","static/chunks/app/model_hub/page-068a441595bd0fc3.js"],"default",1]
4:I[4707,[],""]
5:I[36423,[],""]
0:["eKo9wJb6ZFjAy85GaOeQ6",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
0:["cxKWUnb1loD53gKRUqiFm",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"]
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-6810261f4d6c8bbf.js","899","static/chunks/899-8d832fe7c09b2afe.js","250","static/chunks/250-f21e8c1de1717077.js","461","static/chunks/app/onboarding/page-957334f0c5d5df41.js"],"default",1]
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-6810261f4d6c8bbf.js","899","static/chunks/899-8d832fe7c09b2afe.js","250","static/chunks/250-ef859ae76fcc7788.js","461","static/chunks/app/onboarding/page-957334f0c5d5df41.js"],"default",1]
4:I[4707,[],""]
5:I[36423,[],""]
0:["eKo9wJb6ZFjAy85GaOeQ6",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
0:["cxKWUnb1loD53gKRUqiFm",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -5,6 +5,7 @@ import { Row, Col, Typography, Card } from "antd";
import TextArea from "antd/es/input/TextArea";
import { Team } from "../key_team_helpers/key_list";
import TeamDropdown from "../common_components/team_dropdown";
import CacheControlSettings from "./cache_control_settings";
const { Link } = Typography;
interface AdvancedSettingsProps {
@ -21,6 +22,7 @@ const AdvancedSettings: React.FC<AdvancedSettingsProps> = ({
const [form] = Form.useForm();
const [customPricing, setCustomPricing] = React.useState(false);
const [pricingModel, setPricingModel] = React.useState<'per_token' | 'per_second'>('per_token');
const [showCacheControl, setShowCacheControl] = React.useState(false);
// Add validation function for numbers
const validateNumber = (_: any, value: string) => {
@ -83,6 +85,24 @@ const AdvancedSettings: React.FC<AdvancedSettingsProps> = ({
}
};
const handleCacheControlChange = (checked: boolean) => {
setShowCacheControl(checked);
if (!checked) {
const currentParams = form.getFieldValue('litellm_extra_params');
try {
let paramsObj = currentParams ? JSON.parse(currentParams) : {};
delete paramsObj.cache_control_injection_points;
if (Object.keys(paramsObj).length > 0) {
form.setFieldValue('litellm_extra_params', JSON.stringify(paramsObj, null, 2));
} else {
form.setFieldValue('litellm_extra_params', '');
}
} catch (error) {
form.setFieldValue('litellm_extra_params', '');
}
}
};
return (
<>
<Accordion className="mt-2 mb-4">
@ -150,6 +170,12 @@ const AdvancedSettings: React.FC<AdvancedSettingsProps> = ({
</div>
)}
<CacheControlSettings
form={form}
showCacheControl={showCacheControl}
onCacheControlChange={handleCacheControlChange}
/>
<Form.Item
label="Use in pass through routes"
name="use_in_pass_through"

View file

@ -0,0 +1,159 @@
import React from "react";
import { Form, Switch, Select, Input, Typography } from "antd";
import { PlusOutlined, MinusCircleOutlined } from '@ant-design/icons';
import NumericalInput from "../shared/numerical_input";
const { Text } = Typography;
interface CacheControlInjectionPoint {
location: "message";
role?: "user" | "system" | "assistant";
index?: number;
}
interface CacheControlSettingsProps {
form: any; // Form instance from parent
showCacheControl: boolean;
onCacheControlChange: (checked: boolean) => void;
}
const CacheControlSettings: React.FC<CacheControlSettingsProps> = ({
form,
showCacheControl,
onCacheControlChange,
}) => {
const updateCacheControlPoints = (injectionPoints: CacheControlInjectionPoint[]) => {
const currentParams = form.getFieldValue('litellm_extra_params');
try {
let paramsObj = currentParams ? JSON.parse(currentParams) : {};
if (injectionPoints.length > 0) {
paramsObj.cache_control_injection_points = injectionPoints;
} else {
delete paramsObj.cache_control_injection_points;
}
if (Object.keys(paramsObj).length > 0) {
form.setFieldValue('litellm_extra_params', JSON.stringify(paramsObj, null, 2));
} else {
form.setFieldValue('litellm_extra_params', '');
}
} catch (error) {
console.error('Error updating cache control points:', error);
}
};
return (
<>
<Form.Item
label="Cache Control"
name="cache_control"
valuePropName="checked"
className="mb-4"
tooltip="Tell litellm where to inject cache control checkpoints. You can specify either by role (to apply to all messages of that role) or by specific message index."
>
<Switch onChange={onCacheControlChange} className="bg-gray-600" />
</Form.Item>
{showCacheControl && (
<div className="ml-6 pl-4 border-l-2 border-gray-200">
<Text className="text-sm text-gray-500 block mb-4">
Specify either a role (to cache all messages of that role) or a specific message index.
If both are provided, the index takes precedence.
</Text>
<Form.List
name="cache_control_injection_points"
initialValue={[{ location: "message" }]}
>
{(fields, { add, remove }) => (
<>
{fields.map((field, index) => (
<div key={field.key} className="flex items-center mb-4 gap-4">
<Form.Item
{...field}
label="Type"
name={[field.name, 'location']}
initialValue="message"
className="mb-0"
style={{ width: '180px' }}
>
<Select disabled options={[{ value: 'message', label: 'Message' }]} />
</Form.Item>
<Form.Item
{...field}
label="Role"
name={[field.name, 'role']}
className="mb-0"
style={{ width: '180px' }}
tooltip="Select a role to cache all messages of this type"
>
<Select
placeholder="Select a role"
allowClear
options={[
{ value: 'user', label: 'User' },
{ value: 'system', label: 'System' },
{ value: 'assistant', label: 'Assistant' },
]}
onChange={() => {
const values = form.getFieldValue('cache_control_points');
updateCacheControlPoints(values);
}}
/>
</Form.Item>
<Form.Item
{...field}
label="Index"
name={[field.name, 'index']}
className="mb-0"
style={{ width: '180px' }}
tooltip="Specify a specific message index (optional)"
>
<NumericalInput
type="number"
placeholder="Optional"
step={1}
min={0}
onChange={() => {
const values = form.getFieldValue('cache_control_points');
updateCacheControlPoints(values);
}}
/>
</Form.Item>
{fields.length > 1 && (
<MinusCircleOutlined
className="text-red-500 cursor-pointer text-lg mt-8"
onClick={() => {
remove(field.name);
setTimeout(() => {
const values = form.getFieldValue('cache_control_points');
updateCacheControlPoints(values);
}, 0);
}}
/>
)}
</div>
))}
<Form.Item>
<button
type="button"
className="flex items-center justify-center w-full border border-dashed border-gray-300 py-2 px-4 text-gray-600 hover:text-blue-600 hover:border-blue-300 transition-all rounded"
onClick={() => add()}
>
<PlusOutlined className="mr-2" />
Add Injection Point
</button>
</Form.Item>
</>
)}
</Form.List>
</div>
)}
</>
);
};
export default CacheControlSettings;

View file

@ -60,7 +60,7 @@ export const prepareModelAddRequest = async (
continue;
}
// Skip the custom_pricing and pricing_model fields as they're only used for UI control
if (key === 'custom_pricing' || key === 'pricing_model') {
if (key === 'custom_pricing' || key === 'pricing_model' || key === 'cache_control') {
continue;
}
if (key == "model_name") {

View file

@ -23,6 +23,7 @@ import { getProviderLogoAndName } from "./provider_info_helpers";
import { getDisplayModelName } from "./view_model/model_name_display";
import AddCredentialsModal from "./model_add/add_credentials_tab";
import ReuseCredentialsModal from "./model_add/reuse_credentials";
import CacheControlSettings from "./add_model/cache_control_settings";
interface ModelInfoViewProps {
modelId: string;
@ -57,6 +58,7 @@ export default function ModelInfoView({
const [isSaving, setIsSaving] = useState(false);
const [isEditing, setIsEditing] = useState(false);
const [existingCredential, setExistingCredential] = useState<CredentialItem | null>(null);
const [showCacheControl, setShowCacheControl] = useState(false);
const canEditModel = userRole === "Admin" || modelData.model_info.created_by === userID;
const isAdmin = userRole === "Admin";
@ -86,6 +88,11 @@ export default function ModelInfoView({
console.log("modelInfoResponse, ", modelInfoResponse);
let specificModelData = modelInfoResponse.data[0];
setLocalModelData(specificModelData);
// Check if cache control is enabled
if (specificModelData?.litellm_params?.cache_control_injection_points) {
setShowCacheControl(true);
}
}
getExistingCredential();
getModelInfo();
@ -112,22 +119,31 @@ export default function ModelInfoView({
if (!accessToken) return;
setIsSaving(true);
let updatedLitellmParams = {
...localModelData.litellm_params,
model: values.litellm_model_name,
api_base: values.api_base,
custom_llm_provider: values.custom_llm_provider,
organization: values.organization,
tpm: values.tpm,
rpm: values.rpm,
max_retries: values.max_retries,
timeout: values.timeout,
stream_timeout: values.stream_timeout,
input_cost_per_token: values.input_cost / 1_000_000,
output_cost_per_token: values.output_cost / 1_000_000,
};
// Handle cache control settings
if (values.cache_control && values.cache_control_injection_points?.length > 0) {
updatedLitellmParams.cache_control_injection_points = values.cache_control_injection_points;
} else {
delete updatedLitellmParams.cache_control_injection_points;
}
const updateData = {
model_name: values.model_name,
litellm_params: {
...localModelData.litellm_params,
model: values.litellm_model_name,
api_base: values.api_base,
custom_llm_provider: values.custom_llm_provider,
organization: values.organization,
tpm: values.tpm,
rpm: values.rpm,
max_retries: values.max_retries,
timeout: values.timeout,
stream_timeout: values.stream_timeout,
input_cost_per_token: values.input_cost / 1_000_000,
output_cost_per_token: values.output_cost / 1_000_000,
},
litellm_params: updatedLitellmParams,
model_info: {
id: modelId,
}
@ -139,7 +155,7 @@ export default function ModelInfoView({
...localModelData,
model_name: values.model_name,
litellm_model_name: values.litellm_model_name,
litellm_params: updateData.litellm_params
litellm_params: updatedLitellmParams
};
setLocalModelData(updatedModelData);
@ -337,6 +353,8 @@ export default function ModelInfoView({
(localModelData.litellm_params.input_cost_per_token * 1_000_000) : localModelData.model_info?.input_cost_per_token * 1_000_000 || null,
output_cost: localModelData.litellm_params?.output_cost_per_token ?
(localModelData.litellm_params.output_cost_per_token * 1_000_000) : localModelData.model_info?.output_cost_per_token * 1_000_000 || null,
cache_control: localModelData.litellm_params?.cache_control_injection_points ? true : false,
cache_control_injection_points: localModelData.litellm_params?.cache_control_injection_points || [],
}}
layout="vertical"
onValuesChange={() => setIsDirty(true)}
@ -499,6 +517,37 @@ export default function ModelInfoView({
)}
</div>
{/* Cache Control Section */}
{isEditing ? (
<CacheControlSettings
form={form}
showCacheControl={showCacheControl}
onCacheControlChange={(checked) => setShowCacheControl(checked)}
/>
) : (
<div>
<Text className="font-medium">Cache Control</Text>
<div className="mt-1 p-2 bg-gray-50 rounded">
{localModelData.litellm_params?.cache_control_injection_points ? (
<div>
<p>Enabled</p>
<div className="mt-2">
{localModelData.litellm_params.cache_control_injection_points.map((point: any, i: number) => (
<div key={i} className="text-sm text-gray-600 mb-1">
Location: {point.location},
{point.role && <span> Role: {point.role}</span>}
{point.index !== undefined && <span> Index: {point.index}</span>}
</div>
))}
</div>
</div>
) : (
"Disabled"
)}
</div>
</div>
)}
<div>
<Text className="font-medium">Team ID</Text>
<div className="mt-1 p-2 bg-gray-50 rounded">