mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Merge branch 'BerriAI:main' into NolanTrem/GeminiSystemMessages
This commit is contained in:
commit
adc4f161da
74 changed files with 2462 additions and 538 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -73,6 +73,7 @@ tests/local_testing/log.txt
|
|||
.codegpt
|
||||
litellm/proxy/_new_new_secret_config.yaml
|
||||
litellm/proxy/custom_guardrail.py
|
||||
.mypy_cache/*
|
||||
litellm/proxy/_experimental/out/404.html
|
||||
litellm/proxy/_experimental/out/404.html
|
||||
litellm/proxy/_experimental/out/model_hub.html
|
||||
|
|
|
@ -4,7 +4,7 @@ Pass-through endpoints for Cohere - call provider-specific endpoint, in native f
|
|||
|
||||
| Feature | Supported | Notes |
|
||||
|-------|-------|-------|
|
||||
| Cost Tracking | ❌ | [Tell us if you need this](https://github.com/BerriAI/litellm/issues/new) |
|
||||
| Cost Tracking | ✅ | works across all integrations |
|
||||
| Logging | ✅ | works across all integrations |
|
||||
| End-user Tracking | ❌ | [Tell us if you need this](https://github.com/BerriAI/litellm/issues/new) |
|
||||
| Streaming | ✅ | |
|
||||
|
|
|
@ -323,6 +323,9 @@ router_settings:
|
|||
| AZURE_AUTHORITY_HOST | Azure authority host URL
|
||||
| AZURE_CLIENT_ID | Client ID for Azure services
|
||||
| AZURE_CLIENT_SECRET | Client secret for Azure services
|
||||
| AZURE_TENANT_ID | Tenant ID for Azure Active Directory
|
||||
| AZURE_USERNAME | Username for Azure services, use in conjunction with AZURE_PASSWORD for azure ad token with basic username/password workflow
|
||||
| AZURE_PASSWORD | Password for Azure services, use in conjunction with AZURE_USERNAME for azure ad token with basic username/password workflow
|
||||
| AZURE_FEDERATED_TOKEN_FILE | File path to Azure federated token
|
||||
| AZURE_KEY_VAULT_URI | URI for Azure Key Vault
|
||||
| AZURE_STORAGE_ACCOUNT_KEY | The Azure Storage Account Key to use for Authentication to Azure Blob Storage logging
|
||||
|
@ -331,7 +334,7 @@ router_settings:
|
|||
| AZURE_STORAGE_TENANT_ID | The Application Tenant ID to use for Authentication to Azure Blob Storage logging
|
||||
| AZURE_STORAGE_CLIENT_ID | The Application Client ID to use for Authentication to Azure Blob Storage logging
|
||||
| AZURE_STORAGE_CLIENT_SECRET | The Application Client Secret to use for Authentication to Azure Blob Storage logging
|
||||
| AZURE_TENANT_ID | Tenant ID for Azure Active Directory
|
||||
|
||||
| BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service
|
||||
| BRAINTRUST_API_KEY | API key for Braintrust integration
|
||||
| CIRCLE_OIDC_TOKEN | OpenID Connect token for CircleCI
|
||||
|
@ -433,6 +436,7 @@ router_settings:
|
|||
| LITERAL_BATCH_SIZE | Batch size for Literal operations
|
||||
| LITELLM_DONT_SHOW_FEEDBACK_BOX | Flag to hide feedback box in LiteLLM UI
|
||||
| LITELLM_DROP_PARAMS | Parameters to drop in LiteLLM requests
|
||||
| LITELLM_MODIFY_PARAMS | Parameters to modify in LiteLLM requests
|
||||
| LITELLM_EMAIL | Email associated with LiteLLM account
|
||||
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRIES | Maximum retries for parallel requests in LiteLLM
|
||||
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRY_TIMEOUT | Timeout for retries of parallel requests in LiteLLM
|
||||
|
@ -446,6 +450,8 @@ router_settings:
|
|||
| LITELLM_TOKEN | Access token for LiteLLM integration
|
||||
| LITELLM_PRINT_STANDARD_LOGGING_PAYLOAD | If true, prints the standard logging payload to the console - useful for debugging
|
||||
| LOGFIRE_TOKEN | Token for Logfire logging service
|
||||
| MISTRAL_API_BASE | Base URL for Mistral API
|
||||
| MISTRAL_API_KEY | API key for Mistral API
|
||||
| MICROSOFT_CLIENT_ID | Client ID for Microsoft services
|
||||
| MICROSOFT_CLIENT_SECRET | Client secret for Microsoft services
|
||||
| MICROSOFT_TENANT | Tenant ID for Microsoft Azure
|
||||
|
|
|
@ -188,7 +188,7 @@ const sidebars = {
|
|||
"providers/azure_ai",
|
||||
"providers/aiml",
|
||||
"providers/vertex",
|
||||
|
||||
|
||||
{
|
||||
type: "category",
|
||||
label: "Google AI Studio",
|
||||
|
@ -340,7 +340,7 @@ const sidebars = {
|
|||
},
|
||||
"rerank",
|
||||
"assistants",
|
||||
|
||||
|
||||
{
|
||||
type: "category",
|
||||
label: "/files",
|
||||
|
@ -407,9 +407,9 @@ const sidebars = {
|
|||
type: "category",
|
||||
label: "Logging & Observability",
|
||||
items: [
|
||||
"observability/langfuse_integration",
|
||||
"observability/lunary_integration",
|
||||
"observability/mlflow",
|
||||
"observability/langfuse_integration",
|
||||
"observability/gcs_bucket_integration",
|
||||
"observability/langsmith_integration",
|
||||
"observability/literalai_integration",
|
||||
|
|
|
@ -113,6 +113,7 @@ _custom_logger_compatible_callbacks_literal = Literal[
|
|||
"pagerduty",
|
||||
"humanloop",
|
||||
"gcs_pubsub",
|
||||
"anthropic_cache_control_hook",
|
||||
]
|
||||
logged_real_time_event_types: Optional[Union[List[str], Literal["*"]]] = None
|
||||
_known_custom_logger_compatible_callbacks: List = list(
|
||||
|
@ -127,19 +128,19 @@ prometheus_initialize_budget_metrics: Optional[bool] = False
|
|||
require_auth_for_metrics_endpoint: Optional[bool] = False
|
||||
argilla_batch_size: Optional[int] = None
|
||||
datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload
|
||||
gcs_pub_sub_use_v1: Optional[bool] = (
|
||||
False # if you want to use v1 gcs pubsub logged payload
|
||||
)
|
||||
gcs_pub_sub_use_v1: Optional[
|
||||
bool
|
||||
] = False # if you want to use v1 gcs pubsub logged payload
|
||||
argilla_transformation_object: Optional[Dict[str, Any]] = None
|
||||
_async_input_callback: List[Union[str, Callable, CustomLogger]] = (
|
||||
[]
|
||||
) # internal variable - async custom callbacks are routed here.
|
||||
_async_success_callback: List[Union[str, Callable, CustomLogger]] = (
|
||||
[]
|
||||
) # internal variable - async custom callbacks are routed here.
|
||||
_async_failure_callback: List[Union[str, Callable, CustomLogger]] = (
|
||||
[]
|
||||
) # internal variable - async custom callbacks are routed here.
|
||||
_async_input_callback: List[
|
||||
Union[str, Callable, CustomLogger]
|
||||
] = [] # internal variable - async custom callbacks are routed here.
|
||||
_async_success_callback: List[
|
||||
Union[str, Callable, CustomLogger]
|
||||
] = [] # internal variable - async custom callbacks are routed here.
|
||||
_async_failure_callback: List[
|
||||
Union[str, Callable, CustomLogger]
|
||||
] = [] # internal variable - async custom callbacks are routed here.
|
||||
pre_call_rules: List[Callable] = []
|
||||
post_call_rules: List[Callable] = []
|
||||
turn_off_message_logging: Optional[bool] = False
|
||||
|
@ -147,22 +148,22 @@ log_raw_request_response: bool = False
|
|||
redact_messages_in_exceptions: Optional[bool] = False
|
||||
redact_user_api_key_info: Optional[bool] = False
|
||||
filter_invalid_headers: Optional[bool] = False
|
||||
add_user_information_to_llm_headers: Optional[bool] = (
|
||||
None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
|
||||
)
|
||||
add_user_information_to_llm_headers: Optional[
|
||||
bool
|
||||
] = None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
|
||||
store_audit_logs = False # Enterprise feature, allow users to see audit logs
|
||||
### end of callbacks #############
|
||||
|
||||
email: Optional[str] = (
|
||||
None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
)
|
||||
token: Optional[str] = (
|
||||
None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
)
|
||||
email: Optional[
|
||||
str
|
||||
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
token: Optional[
|
||||
str
|
||||
] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
telemetry = True
|
||||
max_tokens: int = DEFAULT_MAX_TOKENS # OpenAI Defaults
|
||||
drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False))
|
||||
modify_params = False
|
||||
modify_params = bool(os.getenv("LITELLM_MODIFY_PARAMS", False))
|
||||
retry = True
|
||||
### AUTH ###
|
||||
api_key: Optional[str] = None
|
||||
|
@ -234,24 +235,20 @@ enable_loadbalancing_on_batch_endpoints: Optional[bool] = None
|
|||
enable_caching_on_provider_specific_optional_params: bool = (
|
||||
False # feature-flag for caching on optional params - e.g. 'top_k'
|
||||
)
|
||||
caching: bool = (
|
||||
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
)
|
||||
caching_with_models: bool = (
|
||||
False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
)
|
||||
cache: Optional[Cache] = (
|
||||
None # cache object <- use this - https://docs.litellm.ai/docs/caching
|
||||
)
|
||||
caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
cache: Optional[
|
||||
Cache
|
||||
] = None # cache object <- use this - https://docs.litellm.ai/docs/caching
|
||||
default_in_memory_ttl: Optional[float] = None
|
||||
default_redis_ttl: Optional[float] = None
|
||||
default_redis_batch_cache_expiry: Optional[float] = None
|
||||
model_alias_map: Dict[str, str] = {}
|
||||
model_group_alias_map: Dict[str, str] = {}
|
||||
max_budget: float = 0.0 # set the max budget across all providers
|
||||
budget_duration: Optional[str] = (
|
||||
None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||
)
|
||||
budget_duration: Optional[
|
||||
str
|
||||
] = None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||
default_soft_budget: float = (
|
||||
DEFAULT_SOFT_BUDGET # by default all litellm proxy keys have a soft budget of 50.0
|
||||
)
|
||||
|
@ -260,15 +257,11 @@ forward_traceparent_to_llm_provider: bool = False
|
|||
|
||||
_current_cost = 0.0 # private variable, used if max budget is set
|
||||
error_logs: Dict = {}
|
||||
add_function_to_prompt: bool = (
|
||||
False # if function calling not supported by api, append function call details to system prompt
|
||||
)
|
||||
add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
|
||||
client_session: Optional[httpx.Client] = None
|
||||
aclient_session: Optional[httpx.AsyncClient] = None
|
||||
model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
|
||||
model_cost_map_url: str = (
|
||||
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
||||
)
|
||||
model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
||||
suppress_debug_info = False
|
||||
dynamodb_table_name: Optional[str] = None
|
||||
s3_callback_params: Optional[Dict] = None
|
||||
|
@ -291,9 +284,7 @@ disable_end_user_cost_tracking_prometheus_only: Optional[bool] = None
|
|||
custom_prometheus_metadata_labels: List[str] = []
|
||||
#### REQUEST PRIORITIZATION ####
|
||||
priority_reservation: Optional[Dict[str, float]] = None
|
||||
force_ipv4: bool = (
|
||||
False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
|
||||
)
|
||||
force_ipv4: bool = False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
|
||||
module_level_aclient = AsyncHTTPHandler(
|
||||
timeout=request_timeout, client_alias="module level aclient"
|
||||
)
|
||||
|
@ -307,13 +298,13 @@ fallbacks: Optional[List] = None
|
|||
context_window_fallbacks: Optional[List] = None
|
||||
content_policy_fallbacks: Optional[List] = None
|
||||
allowed_fails: int = 3
|
||||
num_retries_per_request: Optional[int] = (
|
||||
None # for the request overall (incl. fallbacks + model retries)
|
||||
)
|
||||
num_retries_per_request: Optional[
|
||||
int
|
||||
] = None # for the request overall (incl. fallbacks + model retries)
|
||||
####### SECRET MANAGERS #####################
|
||||
secret_manager_client: Optional[Any] = (
|
||||
None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
|
||||
)
|
||||
secret_manager_client: Optional[
|
||||
Any
|
||||
] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
|
||||
_google_kms_resource_name: Optional[str] = None
|
||||
_key_management_system: Optional[KeyManagementSystem] = None
|
||||
_key_management_settings: KeyManagementSettings = KeyManagementSettings()
|
||||
|
@ -324,6 +315,7 @@ from litellm.litellm_core_utils.get_model_cost_map import get_model_cost_map
|
|||
|
||||
model_cost = get_model_cost_map(url=model_cost_map_url)
|
||||
custom_prompt_dict: Dict[str, dict] = {}
|
||||
check_provider_endpoint = False
|
||||
|
||||
|
||||
####### THREAD-SPECIFIC DATA ####################
|
||||
|
@ -1063,10 +1055,10 @@ from .types.llms.custom_llm import CustomLLMItem
|
|||
from .types.utils import GenericStreamingChunk
|
||||
|
||||
custom_provider_map: List[CustomLLMItem] = []
|
||||
_custom_providers: List[str] = (
|
||||
[]
|
||||
) # internal helper util, used to track names of custom providers
|
||||
disable_hf_tokenizer_download: Optional[bool] = (
|
||||
None # disable huggingface tokenizer download. Defaults to openai clk100
|
||||
)
|
||||
_custom_providers: List[
|
||||
str
|
||||
] = [] # internal helper util, used to track names of custom providers
|
||||
disable_hf_tokenizer_download: Optional[
|
||||
bool
|
||||
] = None # disable huggingface tokenizer download. Defaults to openai clk100
|
||||
global_disable_no_log_param: bool = False
|
||||
|
|
150
litellm/integrations/anthropic_cache_control_hook.py
Normal file
150
litellm/integrations/anthropic_cache_control_hook.py
Normal file
|
@ -0,0 +1,150 @@
|
|||
"""
|
||||
This hook is used to inject cache control directives into the messages of a chat completion.
|
||||
|
||||
Users can define
|
||||
- `cache_control_injection_points` in the completion params and litellm will inject the cache control directives into the messages at the specified injection points.
|
||||
|
||||
"""
|
||||
|
||||
import copy
|
||||
from typing import Dict, List, Optional, Tuple, Union, cast
|
||||
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.integrations.custom_prompt_management import CustomPromptManagement
|
||||
from litellm.types.integrations.anthropic_cache_control_hook import (
|
||||
CacheControlInjectionPoint,
|
||||
CacheControlMessageInjectionPoint,
|
||||
)
|
||||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionCachedContent
|
||||
from litellm.types.utils import StandardCallbackDynamicParams
|
||||
|
||||
|
||||
class AnthropicCacheControlHook(CustomPromptManagement):
|
||||
def get_chat_completion_prompt(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
non_default_params: dict,
|
||||
prompt_id: Optional[str],
|
||||
prompt_variables: Optional[dict],
|
||||
dynamic_callback_params: StandardCallbackDynamicParams,
|
||||
) -> Tuple[str, List[AllMessageValues], dict]:
|
||||
"""
|
||||
Apply cache control directives based on specified injection points.
|
||||
|
||||
Returns:
|
||||
- model: str - the model to use
|
||||
- messages: List[AllMessageValues] - messages with applied cache controls
|
||||
- non_default_params: dict - params with any global cache controls
|
||||
"""
|
||||
# Extract cache control injection points
|
||||
injection_points: List[CacheControlInjectionPoint] = non_default_params.pop(
|
||||
"cache_control_injection_points", []
|
||||
)
|
||||
if not injection_points:
|
||||
return model, messages, non_default_params
|
||||
|
||||
# Create a deep copy of messages to avoid modifying the original list
|
||||
processed_messages = copy.deepcopy(messages)
|
||||
|
||||
# Process message-level cache controls
|
||||
for point in injection_points:
|
||||
if point.get("location") == "message":
|
||||
point = cast(CacheControlMessageInjectionPoint, point)
|
||||
processed_messages = self._process_message_injection(
|
||||
point=point, messages=processed_messages
|
||||
)
|
||||
|
||||
return model, processed_messages, non_default_params
|
||||
|
||||
@staticmethod
|
||||
def _process_message_injection(
|
||||
point: CacheControlMessageInjectionPoint, messages: List[AllMessageValues]
|
||||
) -> List[AllMessageValues]:
|
||||
"""Process message-level cache control injection."""
|
||||
control: ChatCompletionCachedContent = point.get(
|
||||
"control", None
|
||||
) or ChatCompletionCachedContent(type="ephemeral")
|
||||
|
||||
_targetted_index: Optional[Union[int, str]] = point.get("index", None)
|
||||
targetted_index: Optional[int] = None
|
||||
if isinstance(_targetted_index, str):
|
||||
if _targetted_index.isdigit():
|
||||
targetted_index = int(_targetted_index)
|
||||
else:
|
||||
targetted_index = _targetted_index
|
||||
|
||||
targetted_role = point.get("role", None)
|
||||
|
||||
# Case 1: Target by specific index
|
||||
if targetted_index is not None:
|
||||
if 0 <= targetted_index < len(messages):
|
||||
messages[targetted_index] = (
|
||||
AnthropicCacheControlHook._safe_insert_cache_control_in_message(
|
||||
messages[targetted_index], control
|
||||
)
|
||||
)
|
||||
# Case 2: Target by role
|
||||
elif targetted_role is not None:
|
||||
for msg in messages:
|
||||
if msg.get("role") == targetted_role:
|
||||
msg = (
|
||||
AnthropicCacheControlHook._safe_insert_cache_control_in_message(
|
||||
message=msg, control=control
|
||||
)
|
||||
)
|
||||
return messages
|
||||
|
||||
@staticmethod
|
||||
def _safe_insert_cache_control_in_message(
|
||||
message: AllMessageValues, control: ChatCompletionCachedContent
|
||||
) -> AllMessageValues:
|
||||
"""
|
||||
Safe way to insert cache control in a message
|
||||
|
||||
OpenAI Message content can be either:
|
||||
- string
|
||||
- list of objects
|
||||
|
||||
This method handles inserting cache control in both cases.
|
||||
"""
|
||||
message_content = message.get("content", None)
|
||||
|
||||
# 1. if string, insert cache control in the message
|
||||
if isinstance(message_content, str):
|
||||
message["cache_control"] = control # type: ignore
|
||||
# 2. list of objects
|
||||
elif isinstance(message_content, list):
|
||||
for content_item in message_content:
|
||||
if isinstance(content_item, dict):
|
||||
content_item["cache_control"] = control # type: ignore
|
||||
return message
|
||||
|
||||
@property
|
||||
def integration_name(self) -> str:
|
||||
"""Return the integration name for this hook."""
|
||||
return "anthropic_cache_control_hook"
|
||||
|
||||
@staticmethod
|
||||
def should_use_anthropic_cache_control_hook(non_default_params: Dict) -> bool:
|
||||
if non_default_params.get("cache_control_injection_points", None):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def get_custom_logger_for_anthropic_cache_control_hook(
|
||||
non_default_params: Dict,
|
||||
) -> Optional[CustomLogger]:
|
||||
from litellm.litellm_core_utils.litellm_logging import (
|
||||
_init_custom_logger_compatible_class,
|
||||
)
|
||||
|
||||
if AnthropicCacheControlHook.should_use_anthropic_cache_control_hook(
|
||||
non_default_params
|
||||
):
|
||||
return _init_custom_logger_compatible_class(
|
||||
logging_integration="anthropic_cache_control_hook",
|
||||
internal_usage_cache=None,
|
||||
llm_router=None,
|
||||
)
|
||||
return None
|
|
@ -94,7 +94,7 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
non_default_params: dict,
|
||||
prompt_id: str,
|
||||
prompt_id: Optional[str],
|
||||
prompt_variables: Optional[dict],
|
||||
dynamic_callback_params: StandardCallbackDynamicParams,
|
||||
) -> Tuple[str, List[AllMessageValues], dict]:
|
||||
|
|
|
@ -15,7 +15,7 @@ class CustomPromptManagement(CustomLogger, PromptManagementBase):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
non_default_params: dict,
|
||||
prompt_id: str,
|
||||
prompt_id: Optional[str],
|
||||
prompt_variables: Optional[dict],
|
||||
dynamic_callback_params: StandardCallbackDynamicParams,
|
||||
) -> Tuple[str, List[AllMessageValues], dict]:
|
||||
|
|
|
@ -152,14 +152,21 @@ class HumanloopLogger(CustomLogger):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
non_default_params: dict,
|
||||
prompt_id: str,
|
||||
prompt_id: Optional[str],
|
||||
prompt_variables: Optional[dict],
|
||||
dynamic_callback_params: StandardCallbackDynamicParams,
|
||||
) -> Tuple[str, List[AllMessageValues], dict,]:
|
||||
) -> Tuple[
|
||||
str,
|
||||
List[AllMessageValues],
|
||||
dict,
|
||||
]:
|
||||
humanloop_api_key = dynamic_callback_params.get(
|
||||
"humanloop_api_key"
|
||||
) or get_secret_str("HUMANLOOP_API_KEY")
|
||||
|
||||
if prompt_id is None:
|
||||
raise ValueError("prompt_id is required for Humanloop integration")
|
||||
|
||||
if humanloop_api_key is None:
|
||||
return super().get_chat_completion_prompt(
|
||||
model=model,
|
||||
|
|
|
@ -169,10 +169,14 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
non_default_params: dict,
|
||||
prompt_id: str,
|
||||
prompt_id: Optional[str],
|
||||
prompt_variables: Optional[dict],
|
||||
dynamic_callback_params: StandardCallbackDynamicParams,
|
||||
) -> Tuple[str, List[AllMessageValues], dict,]:
|
||||
) -> Tuple[
|
||||
str,
|
||||
List[AllMessageValues],
|
||||
dict,
|
||||
]:
|
||||
return self.get_chat_completion_prompt(
|
||||
model,
|
||||
messages,
|
||||
|
|
|
@ -79,10 +79,12 @@ class PromptManagementBase(ABC):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
non_default_params: dict,
|
||||
prompt_id: str,
|
||||
prompt_id: Optional[str],
|
||||
prompt_variables: Optional[dict],
|
||||
dynamic_callback_params: StandardCallbackDynamicParams,
|
||||
) -> Tuple[str, List[AllMessageValues], dict,]:
|
||||
) -> Tuple[str, List[AllMessageValues], dict]:
|
||||
if prompt_id is None:
|
||||
raise ValueError("prompt_id is required for Prompt Management Base class")
|
||||
if not self.should_run_prompt_management(
|
||||
prompt_id=prompt_id, dynamic_callback_params=dynamic_callback_params
|
||||
):
|
||||
|
|
|
@ -36,6 +36,7 @@ from litellm.cost_calculator import (
|
|||
RealtimeAPITokenUsageProcessor,
|
||||
_select_model_name_for_cost_calc,
|
||||
)
|
||||
from litellm.integrations.anthropic_cache_control_hook import AnthropicCacheControlHook
|
||||
from litellm.integrations.arize.arize import ArizeLogger
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
@ -249,9 +250,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
self.litellm_trace_id = litellm_trace_id
|
||||
self.function_id = function_id
|
||||
self.streaming_chunks: List[Any] = [] # for generating complete stream response
|
||||
self.sync_streaming_chunks: List[
|
||||
Any
|
||||
] = [] # for generating complete stream response
|
||||
self.sync_streaming_chunks: List[Any] = (
|
||||
[]
|
||||
) # for generating complete stream response
|
||||
self.log_raw_request_response = log_raw_request_response
|
||||
|
||||
# Initialize dynamic callbacks
|
||||
|
@ -455,19 +456,38 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
if "custom_llm_provider" in self.model_call_details:
|
||||
self.custom_llm_provider = self.model_call_details["custom_llm_provider"]
|
||||
|
||||
def should_run_prompt_management_hooks(
|
||||
self,
|
||||
non_default_params: Dict,
|
||||
prompt_id: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Return True if prompt management hooks should be run
|
||||
"""
|
||||
if prompt_id:
|
||||
return True
|
||||
if AnthropicCacheControlHook.should_use_anthropic_cache_control_hook(
|
||||
non_default_params
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_chat_completion_prompt(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
non_default_params: dict,
|
||||
prompt_id: str,
|
||||
non_default_params: Dict,
|
||||
prompt_id: Optional[str],
|
||||
prompt_variables: Optional[dict],
|
||||
prompt_management_logger: Optional[CustomLogger] = None,
|
||||
) -> Tuple[str, List[AllMessageValues], dict]:
|
||||
custom_logger = (
|
||||
prompt_management_logger
|
||||
or self.get_custom_logger_for_prompt_management(model)
|
||||
or self.get_custom_logger_for_prompt_management(
|
||||
model=model, non_default_params=non_default_params
|
||||
)
|
||||
)
|
||||
|
||||
if custom_logger:
|
||||
(
|
||||
model,
|
||||
|
@ -476,7 +496,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
) = custom_logger.get_chat_completion_prompt(
|
||||
model=model,
|
||||
messages=messages,
|
||||
non_default_params=non_default_params,
|
||||
non_default_params=non_default_params or {},
|
||||
prompt_id=prompt_id,
|
||||
prompt_variables=prompt_variables,
|
||||
dynamic_callback_params=self.standard_callback_dynamic_params,
|
||||
|
@ -485,7 +505,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
return model, messages, non_default_params
|
||||
|
||||
def get_custom_logger_for_prompt_management(
|
||||
self, model: str
|
||||
self, model: str, non_default_params: Dict
|
||||
) -> Optional[CustomLogger]:
|
||||
"""
|
||||
Get a custom logger for prompt management based on model name or available callbacks.
|
||||
|
@ -520,6 +540,26 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
self.model_call_details["prompt_integration"] = logger.__class__.__name__
|
||||
return logger
|
||||
|
||||
if anthropic_cache_control_logger := AnthropicCacheControlHook.get_custom_logger_for_anthropic_cache_control_hook(
|
||||
non_default_params
|
||||
):
|
||||
self.model_call_details["prompt_integration"] = (
|
||||
anthropic_cache_control_logger.__class__.__name__
|
||||
)
|
||||
return anthropic_cache_control_logger
|
||||
|
||||
return None
|
||||
|
||||
def get_custom_logger_for_anthropic_cache_control_hook(
|
||||
self, non_default_params: Dict
|
||||
) -> Optional[CustomLogger]:
|
||||
if non_default_params.get("cache_control_injection_points", None):
|
||||
custom_logger = _init_custom_logger_compatible_class(
|
||||
logging_integration="anthropic_cache_control_hook",
|
||||
internal_usage_cache=None,
|
||||
llm_router=None,
|
||||
)
|
||||
return custom_logger
|
||||
return None
|
||||
|
||||
def _get_raw_request_body(self, data: Optional[Union[dict, str]]) -> dict:
|
||||
|
@ -557,9 +597,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
model
|
||||
): # if model name was changes pre-call, overwrite the initial model call name with the new one
|
||||
self.model_call_details["model"] = model
|
||||
self.model_call_details["litellm_params"][
|
||||
"api_base"
|
||||
] = self._get_masked_api_base(additional_args.get("api_base", ""))
|
||||
self.model_call_details["litellm_params"]["api_base"] = (
|
||||
self._get_masked_api_base(additional_args.get("api_base", ""))
|
||||
)
|
||||
|
||||
def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915
|
||||
# Log the exact input to the LLM API
|
||||
|
@ -588,10 +628,10 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
try:
|
||||
# [Non-blocking Extra Debug Information in metadata]
|
||||
if turn_off_message_logging is True:
|
||||
_metadata[
|
||||
"raw_request"
|
||||
] = "redacted by litellm. \
|
||||
_metadata["raw_request"] = (
|
||||
"redacted by litellm. \
|
||||
'litellm.turn_off_message_logging=True'"
|
||||
)
|
||||
else:
|
||||
curl_command = self._get_request_curl_command(
|
||||
api_base=additional_args.get("api_base", ""),
|
||||
|
@ -602,32 +642,32 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
|
||||
_metadata["raw_request"] = str(curl_command)
|
||||
# split up, so it's easier to parse in the UI
|
||||
self.model_call_details[
|
||||
"raw_request_typed_dict"
|
||||
] = RawRequestTypedDict(
|
||||
raw_request_api_base=str(
|
||||
additional_args.get("api_base") or ""
|
||||
),
|
||||
raw_request_body=self._get_raw_request_body(
|
||||
additional_args.get("complete_input_dict", {})
|
||||
),
|
||||
raw_request_headers=self._get_masked_headers(
|
||||
additional_args.get("headers", {}) or {},
|
||||
ignore_sensitive_headers=True,
|
||||
),
|
||||
error=None,
|
||||
self.model_call_details["raw_request_typed_dict"] = (
|
||||
RawRequestTypedDict(
|
||||
raw_request_api_base=str(
|
||||
additional_args.get("api_base") or ""
|
||||
),
|
||||
raw_request_body=self._get_raw_request_body(
|
||||
additional_args.get("complete_input_dict", {})
|
||||
),
|
||||
raw_request_headers=self._get_masked_headers(
|
||||
additional_args.get("headers", {}) or {},
|
||||
ignore_sensitive_headers=True,
|
||||
),
|
||||
error=None,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
self.model_call_details[
|
||||
"raw_request_typed_dict"
|
||||
] = RawRequestTypedDict(
|
||||
error=str(e),
|
||||
self.model_call_details["raw_request_typed_dict"] = (
|
||||
RawRequestTypedDict(
|
||||
error=str(e),
|
||||
)
|
||||
)
|
||||
_metadata[
|
||||
"raw_request"
|
||||
] = "Unable to Log \
|
||||
_metadata["raw_request"] = (
|
||||
"Unable to Log \
|
||||
raw request: {}".format(
|
||||
str(e)
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
if self.logger_fn and callable(self.logger_fn):
|
||||
try:
|
||||
|
@ -957,9 +997,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
verbose_logger.debug(
|
||||
f"response_cost_failure_debug_information: {debug_info}"
|
||||
)
|
||||
self.model_call_details[
|
||||
"response_cost_failure_debug_information"
|
||||
] = debug_info
|
||||
self.model_call_details["response_cost_failure_debug_information"] = (
|
||||
debug_info
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
|
@ -984,9 +1024,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
verbose_logger.debug(
|
||||
f"response_cost_failure_debug_information: {debug_info}"
|
||||
)
|
||||
self.model_call_details[
|
||||
"response_cost_failure_debug_information"
|
||||
] = debug_info
|
||||
self.model_call_details["response_cost_failure_debug_information"] = (
|
||||
debug_info
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
@ -1046,9 +1086,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
end_time = datetime.datetime.now()
|
||||
if self.completion_start_time is None:
|
||||
self.completion_start_time = end_time
|
||||
self.model_call_details[
|
||||
"completion_start_time"
|
||||
] = self.completion_start_time
|
||||
self.model_call_details["completion_start_time"] = (
|
||||
self.completion_start_time
|
||||
)
|
||||
self.model_call_details["log_event_type"] = "successful_api_call"
|
||||
self.model_call_details["end_time"] = end_time
|
||||
self.model_call_details["cache_hit"] = cache_hit
|
||||
|
@ -1127,39 +1167,39 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
"response_cost"
|
||||
]
|
||||
else:
|
||||
self.model_call_details[
|
||||
"response_cost"
|
||||
] = self._response_cost_calculator(result=logging_result)
|
||||
self.model_call_details["response_cost"] = (
|
||||
self._response_cost_calculator(result=logging_result)
|
||||
)
|
||||
## STANDARDIZED LOGGING PAYLOAD
|
||||
|
||||
self.model_call_details[
|
||||
"standard_logging_object"
|
||||
] = get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj=logging_result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="success",
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
self.model_call_details["standard_logging_object"] = (
|
||||
get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj=logging_result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="success",
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
)
|
||||
)
|
||||
elif isinstance(result, dict) or isinstance(result, list):
|
||||
## STANDARDIZED LOGGING PAYLOAD
|
||||
self.model_call_details[
|
||||
"standard_logging_object"
|
||||
] = get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="success",
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
self.model_call_details["standard_logging_object"] = (
|
||||
get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="success",
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
)
|
||||
)
|
||||
elif standard_logging_object is not None:
|
||||
self.model_call_details[
|
||||
"standard_logging_object"
|
||||
] = standard_logging_object
|
||||
self.model_call_details["standard_logging_object"] = (
|
||||
standard_logging_object
|
||||
)
|
||||
else: # streaming chunks + image gen.
|
||||
self.model_call_details["response_cost"] = None
|
||||
|
||||
|
@ -1215,23 +1255,23 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
verbose_logger.debug(
|
||||
"Logging Details LiteLLM-Success Call streaming complete"
|
||||
)
|
||||
self.model_call_details[
|
||||
"complete_streaming_response"
|
||||
] = complete_streaming_response
|
||||
self.model_call_details[
|
||||
"response_cost"
|
||||
] = self._response_cost_calculator(result=complete_streaming_response)
|
||||
self.model_call_details["complete_streaming_response"] = (
|
||||
complete_streaming_response
|
||||
)
|
||||
self.model_call_details["response_cost"] = (
|
||||
self._response_cost_calculator(result=complete_streaming_response)
|
||||
)
|
||||
## STANDARDIZED LOGGING PAYLOAD
|
||||
self.model_call_details[
|
||||
"standard_logging_object"
|
||||
] = get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj=complete_streaming_response,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="success",
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
self.model_call_details["standard_logging_object"] = (
|
||||
get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj=complete_streaming_response,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="success",
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
)
|
||||
)
|
||||
callbacks = self.get_combined_callback_list(
|
||||
dynamic_success_callbacks=self.dynamic_success_callbacks,
|
||||
|
@ -1580,10 +1620,10 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
)
|
||||
else:
|
||||
if self.stream and complete_streaming_response:
|
||||
self.model_call_details[
|
||||
"complete_response"
|
||||
] = self.model_call_details.get(
|
||||
"complete_streaming_response", {}
|
||||
self.model_call_details["complete_response"] = (
|
||||
self.model_call_details.get(
|
||||
"complete_streaming_response", {}
|
||||
)
|
||||
)
|
||||
result = self.model_call_details["complete_response"]
|
||||
openMeterLogger.log_success_event(
|
||||
|
@ -1623,10 +1663,10 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
)
|
||||
else:
|
||||
if self.stream and complete_streaming_response:
|
||||
self.model_call_details[
|
||||
"complete_response"
|
||||
] = self.model_call_details.get(
|
||||
"complete_streaming_response", {}
|
||||
self.model_call_details["complete_response"] = (
|
||||
self.model_call_details.get(
|
||||
"complete_streaming_response", {}
|
||||
)
|
||||
)
|
||||
result = self.model_call_details["complete_response"]
|
||||
|
||||
|
@ -1733,9 +1773,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
if complete_streaming_response is not None:
|
||||
print_verbose("Async success callbacks: Got a complete streaming response")
|
||||
|
||||
self.model_call_details[
|
||||
"async_complete_streaming_response"
|
||||
] = complete_streaming_response
|
||||
self.model_call_details["async_complete_streaming_response"] = (
|
||||
complete_streaming_response
|
||||
)
|
||||
try:
|
||||
if self.model_call_details.get("cache_hit", False) is True:
|
||||
self.model_call_details["response_cost"] = 0.0
|
||||
|
@ -1745,10 +1785,10 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
model_call_details=self.model_call_details
|
||||
)
|
||||
# base_model defaults to None if not set on model_info
|
||||
self.model_call_details[
|
||||
"response_cost"
|
||||
] = self._response_cost_calculator(
|
||||
result=complete_streaming_response
|
||||
self.model_call_details["response_cost"] = (
|
||||
self._response_cost_calculator(
|
||||
result=complete_streaming_response
|
||||
)
|
||||
)
|
||||
|
||||
verbose_logger.debug(
|
||||
|
@ -1761,16 +1801,16 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
self.model_call_details["response_cost"] = None
|
||||
|
||||
## STANDARDIZED LOGGING PAYLOAD
|
||||
self.model_call_details[
|
||||
"standard_logging_object"
|
||||
] = get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj=complete_streaming_response,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="success",
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
self.model_call_details["standard_logging_object"] = (
|
||||
get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj=complete_streaming_response,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="success",
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
)
|
||||
)
|
||||
callbacks = self.get_combined_callback_list(
|
||||
dynamic_success_callbacks=self.dynamic_async_success_callbacks,
|
||||
|
@ -1976,18 +2016,18 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
|
||||
## STANDARDIZED LOGGING PAYLOAD
|
||||
|
||||
self.model_call_details[
|
||||
"standard_logging_object"
|
||||
] = get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj={},
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="failure",
|
||||
error_str=str(exception),
|
||||
original_exception=exception,
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
self.model_call_details["standard_logging_object"] = (
|
||||
get_standard_logging_object_payload(
|
||||
kwargs=self.model_call_details,
|
||||
init_response_obj={},
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=self,
|
||||
status="failure",
|
||||
error_str=str(exception),
|
||||
original_exception=exception,
|
||||
standard_built_in_tools_params=self.standard_built_in_tools_params,
|
||||
)
|
||||
)
|
||||
return start_time, end_time
|
||||
|
||||
|
@ -2753,9 +2793,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
|
|||
endpoint=arize_config.endpoint,
|
||||
)
|
||||
|
||||
os.environ[
|
||||
"OTEL_EXPORTER_OTLP_TRACES_HEADERS"
|
||||
] = f"space_key={arize_config.space_key},api_key={arize_config.api_key}"
|
||||
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
|
||||
f"space_key={arize_config.space_key},api_key={arize_config.api_key}"
|
||||
)
|
||||
for callback in _in_memory_loggers:
|
||||
if (
|
||||
isinstance(callback, ArizeLogger)
|
||||
|
@ -2779,9 +2819,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
|
|||
|
||||
# auth can be disabled on local deployments of arize phoenix
|
||||
if arize_phoenix_config.otlp_auth_headers is not None:
|
||||
os.environ[
|
||||
"OTEL_EXPORTER_OTLP_TRACES_HEADERS"
|
||||
] = arize_phoenix_config.otlp_auth_headers
|
||||
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
|
||||
arize_phoenix_config.otlp_auth_headers
|
||||
)
|
||||
|
||||
for callback in _in_memory_loggers:
|
||||
if (
|
||||
|
@ -2872,9 +2912,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
|
|||
exporter="otlp_http",
|
||||
endpoint="https://langtrace.ai/api/trace",
|
||||
)
|
||||
os.environ[
|
||||
"OTEL_EXPORTER_OTLP_TRACES_HEADERS"
|
||||
] = f"api_key={os.getenv('LANGTRACE_API_KEY')}"
|
||||
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
|
||||
f"api_key={os.getenv('LANGTRACE_API_KEY')}"
|
||||
)
|
||||
for callback in _in_memory_loggers:
|
||||
if (
|
||||
isinstance(callback, OpenTelemetry)
|
||||
|
@ -2908,6 +2948,13 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
|
|||
pagerduty_logger = PagerDutyAlerting(**custom_logger_init_args)
|
||||
_in_memory_loggers.append(pagerduty_logger)
|
||||
return pagerduty_logger # type: ignore
|
||||
elif logging_integration == "anthropic_cache_control_hook":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, AnthropicCacheControlHook):
|
||||
return callback
|
||||
anthropic_cache_control_hook = AnthropicCacheControlHook()
|
||||
_in_memory_loggers.append(anthropic_cache_control_hook)
|
||||
return anthropic_cache_control_hook # type: ignore
|
||||
elif logging_integration == "gcs_pubsub":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, GcsPubSubLogger):
|
||||
|
@ -3046,6 +3093,10 @@ def get_custom_logger_compatible_class( # noqa: PLR0915
|
|||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, PagerDutyAlerting):
|
||||
return callback
|
||||
elif logging_integration == "anthropic_cache_control_hook":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, AnthropicCacheControlHook):
|
||||
return callback
|
||||
elif logging_integration == "gcs_pubsub":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, GcsPubSubLogger):
|
||||
|
@ -3369,10 +3420,10 @@ class StandardLoggingPayloadSetup:
|
|||
for key in StandardLoggingHiddenParams.__annotations__.keys():
|
||||
if key in hidden_params:
|
||||
if key == "additional_headers":
|
||||
clean_hidden_params[
|
||||
"additional_headers"
|
||||
] = StandardLoggingPayloadSetup.get_additional_headers(
|
||||
hidden_params[key]
|
||||
clean_hidden_params["additional_headers"] = (
|
||||
StandardLoggingPayloadSetup.get_additional_headers(
|
||||
hidden_params[key]
|
||||
)
|
||||
)
|
||||
else:
|
||||
clean_hidden_params[key] = hidden_params[key] # type: ignore
|
||||
|
@ -3651,7 +3702,7 @@ def emit_standard_logging_payload(payload: StandardLoggingPayload):
|
|||
|
||||
|
||||
def get_standard_logging_metadata(
|
||||
metadata: Optional[Dict[str, Any]]
|
||||
metadata: Optional[Dict[str, Any]],
|
||||
) -> StandardLoggingMetadata:
|
||||
"""
|
||||
Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata.
|
||||
|
@ -3715,9 +3766,9 @@ def scrub_sensitive_keys_in_metadata(litellm_params: Optional[dict]):
|
|||
):
|
||||
for k, v in metadata["user_api_key_metadata"].items():
|
||||
if k == "logging": # prevent logging user logging keys
|
||||
cleaned_user_api_key_metadata[
|
||||
k
|
||||
] = "scrubbed_by_litellm_for_sensitive_keys"
|
||||
cleaned_user_api_key_metadata[k] = (
|
||||
"scrubbed_by_litellm_for_sensitive_keys"
|
||||
)
|
||||
else:
|
||||
cleaned_user_api_key_metadata[k] = v
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ from litellm.utils import (
|
|||
token_counter,
|
||||
)
|
||||
|
||||
from ..common_utils import AnthropicError, process_anthropic_headers
|
||||
from ..common_utils import AnthropicError, AnthropicModelInfo, process_anthropic_headers
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
|
@ -54,7 +54,7 @@ else:
|
|||
LoggingClass = Any
|
||||
|
||||
|
||||
class AnthropicConfig(BaseConfig):
|
||||
class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||
"""
|
||||
Reference: https://docs.anthropic.com/claude/reference/messages_post
|
||||
|
||||
|
@ -127,41 +127,6 @@ class AnthropicConfig(BaseConfig):
|
|||
"anthropic-beta": "prompt-caching-2024-07-31",
|
||||
}
|
||||
|
||||
def get_anthropic_headers(
|
||||
self,
|
||||
api_key: str,
|
||||
anthropic_version: Optional[str] = None,
|
||||
computer_tool_used: bool = False,
|
||||
prompt_caching_set: bool = False,
|
||||
pdf_used: bool = False,
|
||||
is_vertex_request: bool = False,
|
||||
user_anthropic_beta_headers: Optional[List[str]] = None,
|
||||
) -> dict:
|
||||
betas = set()
|
||||
if prompt_caching_set:
|
||||
betas.add("prompt-caching-2024-07-31")
|
||||
if computer_tool_used:
|
||||
betas.add("computer-use-2024-10-22")
|
||||
if pdf_used:
|
||||
betas.add("pdfs-2024-09-25")
|
||||
headers = {
|
||||
"anthropic-version": anthropic_version or "2023-06-01",
|
||||
"x-api-key": api_key,
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
|
||||
if user_anthropic_beta_headers is not None:
|
||||
betas.update(user_anthropic_beta_headers)
|
||||
|
||||
# Don't send any beta headers to Vertex, Vertex has failed requests when they are sent
|
||||
if is_vertex_request is True:
|
||||
pass
|
||||
elif len(betas) > 0:
|
||||
headers["anthropic-beta"] = ",".join(betas)
|
||||
|
||||
return headers
|
||||
|
||||
def _map_tool_choice(
|
||||
self, tool_choice: Optional[str], parallel_tool_use: Optional[bool]
|
||||
) -> Optional[AnthropicMessagesToolChoice]:
|
||||
|
@ -446,49 +411,6 @@ class AnthropicConfig(BaseConfig):
|
|||
)
|
||||
return _tool
|
||||
|
||||
def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
|
||||
"""
|
||||
Return if {"cache_control": ..} in message content block
|
||||
|
||||
Used to check if anthropic prompt caching headers need to be set.
|
||||
"""
|
||||
for message in messages:
|
||||
if message.get("cache_control", None) is not None:
|
||||
return True
|
||||
_message_content = message.get("content")
|
||||
if _message_content is not None and isinstance(_message_content, list):
|
||||
for content in _message_content:
|
||||
if "cache_control" in content:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def is_computer_tool_used(
|
||||
self, tools: Optional[List[AllAnthropicToolsValues]]
|
||||
) -> bool:
|
||||
if tools is None:
|
||||
return False
|
||||
for tool in tools:
|
||||
if "type" in tool and tool["type"].startswith("computer_"):
|
||||
return True
|
||||
return False
|
||||
|
||||
def is_pdf_used(self, messages: List[AllMessageValues]) -> bool:
|
||||
"""
|
||||
Set to true if media passed into messages.
|
||||
|
||||
"""
|
||||
for message in messages:
|
||||
if (
|
||||
"content" in message
|
||||
and message["content"] is not None
|
||||
and isinstance(message["content"], list)
|
||||
):
|
||||
for content in message["content"]:
|
||||
if "type" in content and content["type"] != "text":
|
||||
return True
|
||||
return False
|
||||
|
||||
def translate_system_message(
|
||||
self, messages: List[AllMessageValues]
|
||||
) -> List[AnthropicSystemMessageContent]:
|
||||
|
@ -862,47 +784,3 @@ class AnthropicConfig(BaseConfig):
|
|||
message=error_message,
|
||||
headers=cast(httpx.Headers, headers),
|
||||
)
|
||||
|
||||
def _get_user_anthropic_beta_headers(
|
||||
self, anthropic_beta_header: Optional[str]
|
||||
) -> Optional[List[str]]:
|
||||
if anthropic_beta_header is None:
|
||||
return None
|
||||
return anthropic_beta_header.split(",")
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> Dict:
|
||||
if api_key is None:
|
||||
raise litellm.AuthenticationError(
|
||||
message="Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params. Please set `ANTHROPIC_API_KEY` in your environment vars",
|
||||
llm_provider="anthropic",
|
||||
model=model,
|
||||
)
|
||||
|
||||
tools = optional_params.get("tools")
|
||||
prompt_caching_set = self.is_cache_control_set(messages=messages)
|
||||
computer_tool_used = self.is_computer_tool_used(tools=tools)
|
||||
pdf_used = self.is_pdf_used(messages=messages)
|
||||
user_anthropic_beta_headers = self._get_user_anthropic_beta_headers(
|
||||
anthropic_beta_header=headers.get("anthropic-beta")
|
||||
)
|
||||
anthropic_headers = self.get_anthropic_headers(
|
||||
computer_tool_used=computer_tool_used,
|
||||
prompt_caching_set=prompt_caching_set,
|
||||
pdf_used=pdf_used,
|
||||
api_key=api_key,
|
||||
is_vertex_request=optional_params.get("is_vertex_request", False),
|
||||
user_anthropic_beta_headers=user_anthropic_beta_headers,
|
||||
)
|
||||
|
||||
headers = {**headers, **anthropic_headers}
|
||||
|
||||
return headers
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
This file contains common utils for anthropic calls.
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Union
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
|
@ -10,6 +10,8 @@ import litellm
|
|||
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
|
||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.anthropic import AllAnthropicToolsValues
|
||||
from litellm.types.llms.openai import AllMessageValues
|
||||
|
||||
|
||||
class AnthropicError(BaseLLMException):
|
||||
|
@ -23,6 +25,128 @@ class AnthropicError(BaseLLMException):
|
|||
|
||||
|
||||
class AnthropicModelInfo(BaseLLMModelInfo):
|
||||
def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
|
||||
"""
|
||||
Return if {"cache_control": ..} in message content block
|
||||
|
||||
Used to check if anthropic prompt caching headers need to be set.
|
||||
"""
|
||||
for message in messages:
|
||||
if message.get("cache_control", None) is not None:
|
||||
return True
|
||||
_message_content = message.get("content")
|
||||
if _message_content is not None and isinstance(_message_content, list):
|
||||
for content in _message_content:
|
||||
if "cache_control" in content:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def is_computer_tool_used(
|
||||
self, tools: Optional[List[AllAnthropicToolsValues]]
|
||||
) -> bool:
|
||||
if tools is None:
|
||||
return False
|
||||
for tool in tools:
|
||||
if "type" in tool and tool["type"].startswith("computer_"):
|
||||
return True
|
||||
return False
|
||||
|
||||
def is_pdf_used(self, messages: List[AllMessageValues]) -> bool:
|
||||
"""
|
||||
Set to true if media passed into messages.
|
||||
|
||||
"""
|
||||
for message in messages:
|
||||
if (
|
||||
"content" in message
|
||||
and message["content"] is not None
|
||||
and isinstance(message["content"], list)
|
||||
):
|
||||
for content in message["content"]:
|
||||
if "type" in content and content["type"] != "text":
|
||||
return True
|
||||
return False
|
||||
|
||||
def _get_user_anthropic_beta_headers(
|
||||
self, anthropic_beta_header: Optional[str]
|
||||
) -> Optional[List[str]]:
|
||||
if anthropic_beta_header is None:
|
||||
return None
|
||||
return anthropic_beta_header.split(",")
|
||||
|
||||
def get_anthropic_headers(
|
||||
self,
|
||||
api_key: str,
|
||||
anthropic_version: Optional[str] = None,
|
||||
computer_tool_used: bool = False,
|
||||
prompt_caching_set: bool = False,
|
||||
pdf_used: bool = False,
|
||||
is_vertex_request: bool = False,
|
||||
user_anthropic_beta_headers: Optional[List[str]] = None,
|
||||
) -> dict:
|
||||
betas = set()
|
||||
if prompt_caching_set:
|
||||
betas.add("prompt-caching-2024-07-31")
|
||||
if computer_tool_used:
|
||||
betas.add("computer-use-2024-10-22")
|
||||
if pdf_used:
|
||||
betas.add("pdfs-2024-09-25")
|
||||
headers = {
|
||||
"anthropic-version": anthropic_version or "2023-06-01",
|
||||
"x-api-key": api_key,
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
|
||||
if user_anthropic_beta_headers is not None:
|
||||
betas.update(user_anthropic_beta_headers)
|
||||
|
||||
# Don't send any beta headers to Vertex, Vertex has failed requests when they are sent
|
||||
if is_vertex_request is True:
|
||||
pass
|
||||
elif len(betas) > 0:
|
||||
headers["anthropic-beta"] = ",".join(betas)
|
||||
|
||||
return headers
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> Dict:
|
||||
if api_key is None:
|
||||
raise litellm.AuthenticationError(
|
||||
message="Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params. Please set `ANTHROPIC_API_KEY` in your environment vars",
|
||||
llm_provider="anthropic",
|
||||
model=model,
|
||||
)
|
||||
|
||||
tools = optional_params.get("tools")
|
||||
prompt_caching_set = self.is_cache_control_set(messages=messages)
|
||||
computer_tool_used = self.is_computer_tool_used(tools=tools)
|
||||
pdf_used = self.is_pdf_used(messages=messages)
|
||||
user_anthropic_beta_headers = self._get_user_anthropic_beta_headers(
|
||||
anthropic_beta_header=headers.get("anthropic-beta")
|
||||
)
|
||||
anthropic_headers = self.get_anthropic_headers(
|
||||
computer_tool_used=computer_tool_used,
|
||||
prompt_caching_set=prompt_caching_set,
|
||||
pdf_used=pdf_used,
|
||||
api_key=api_key,
|
||||
is_vertex_request=optional_params.get("is_vertex_request", False),
|
||||
user_anthropic_beta_headers=user_anthropic_beta_headers,
|
||||
)
|
||||
|
||||
headers = {**headers, **anthropic_headers}
|
||||
|
||||
return headers
|
||||
|
||||
@staticmethod
|
||||
def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
|
||||
return (
|
||||
|
|
|
@ -125,14 +125,22 @@ class AzureOpenAIConfig(BaseConfig):
|
|||
) -> bool:
|
||||
"""
|
||||
- check if api_version is supported for response_format
|
||||
- returns True if the API version is equal to or newer than the supported version
|
||||
"""
|
||||
api_year = int(api_version_year)
|
||||
api_month = int(api_version_month)
|
||||
supported_year = int(API_VERSION_YEAR_SUPPORTED_RESPONSE_FORMAT)
|
||||
supported_month = int(API_VERSION_MONTH_SUPPORTED_RESPONSE_FORMAT)
|
||||
|
||||
is_supported = (
|
||||
int(api_version_year) <= API_VERSION_YEAR_SUPPORTED_RESPONSE_FORMAT
|
||||
and int(api_version_month) >= API_VERSION_MONTH_SUPPORTED_RESPONSE_FORMAT
|
||||
)
|
||||
|
||||
return is_supported
|
||||
# If the year is greater than supported year, it's definitely supported
|
||||
if api_year > supported_year:
|
||||
return True
|
||||
# If the year is less than supported year, it's not supported
|
||||
elif api_year < supported_year:
|
||||
return False
|
||||
# If same year, check if month is >= supported month
|
||||
else:
|
||||
return api_month >= supported_month
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
|
@ -202,6 +210,7 @@ class AzureOpenAIConfig(BaseConfig):
|
|||
is_response_format_supported_api_version
|
||||
and _is_response_format_supported_model
|
||||
)
|
||||
|
||||
optional_params = self._add_response_format_to_tools(
|
||||
optional_params=optional_params,
|
||||
value=value,
|
||||
|
|
|
@ -309,21 +309,30 @@ class BaseAzureLLM(BaseOpenAILLM):
|
|||
azure_ad_token_provider: Optional[Callable[[], str]] = None
|
||||
# If we have api_key, then we have higher priority
|
||||
azure_ad_token = litellm_params.get("azure_ad_token")
|
||||
tenant_id = litellm_params.get("tenant_id")
|
||||
client_id = litellm_params.get("client_id")
|
||||
client_secret = litellm_params.get("client_secret")
|
||||
azure_username = litellm_params.get("azure_username")
|
||||
azure_password = litellm_params.get("azure_password")
|
||||
tenant_id = litellm_params.get("tenant_id", os.getenv("AZURE_TENANT_ID"))
|
||||
client_id = litellm_params.get("client_id", os.getenv("AZURE_CLIENT_ID"))
|
||||
client_secret = litellm_params.get(
|
||||
"client_secret", os.getenv("AZURE_CLIENT_SECRET")
|
||||
)
|
||||
azure_username = litellm_params.get(
|
||||
"azure_username", os.getenv("AZURE_USERNAME")
|
||||
)
|
||||
azure_password = litellm_params.get(
|
||||
"azure_password", os.getenv("AZURE_PASSWORD")
|
||||
)
|
||||
max_retries = litellm_params.get("max_retries")
|
||||
timeout = litellm_params.get("timeout")
|
||||
if not api_key and tenant_id and client_id and client_secret:
|
||||
verbose_logger.debug("Using Azure AD Token Provider for Azure Auth")
|
||||
verbose_logger.debug(
|
||||
"Using Azure AD Token Provider from Entrata ID for Azure Auth"
|
||||
)
|
||||
azure_ad_token_provider = get_azure_ad_token_from_entrata_id(
|
||||
tenant_id=tenant_id,
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
)
|
||||
if azure_username and azure_password and client_id:
|
||||
verbose_logger.debug("Using Azure Username and Password for Azure Auth")
|
||||
azure_ad_token_provider = get_azure_ad_token_from_username_password(
|
||||
azure_username=azure_username,
|
||||
azure_password=azure_password,
|
||||
|
@ -331,12 +340,16 @@ class BaseAzureLLM(BaseOpenAILLM):
|
|||
)
|
||||
|
||||
if azure_ad_token is not None and azure_ad_token.startswith("oidc/"):
|
||||
verbose_logger.debug("Using Azure OIDC Token for Azure Auth")
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
elif (
|
||||
not api_key
|
||||
and azure_ad_token_provider is None
|
||||
and litellm.enable_azure_ad_token_refresh is True
|
||||
):
|
||||
verbose_logger.debug(
|
||||
"Using Azure AD token provider based on Service Principal with Secret workflow for Azure Auth"
|
||||
)
|
||||
try:
|
||||
azure_ad_token_provider = get_azure_ad_token_provider()
|
||||
except ValueError:
|
||||
|
|
|
@ -44,6 +44,19 @@ class BaseLLMModelInfo(ABC):
|
|||
def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def get_base_model(model: str) -> Optional[str]:
|
||||
|
|
|
@ -44,7 +44,18 @@ class AmazonBedrockGlobalConfig:
|
|||
)
|
||||
|
||||
def get_ap_regions(self) -> List[str]:
|
||||
return ["ap-northeast-1", "ap-northeast-2", "ap-northeast-3", "ap-south-1"]
|
||||
"""
|
||||
Source: https://www.aws-services.info/bedrock.html
|
||||
"""
|
||||
return [
|
||||
"ap-northeast-1", # Asia Pacific (Tokyo)
|
||||
"ap-northeast-2", # Asia Pacific (Seoul)
|
||||
"ap-northeast-3", # Asia Pacific (Osaka)
|
||||
"ap-south-1", # Asia Pacific (Mumbai)
|
||||
"ap-south-2", # Asia Pacific (Hyderabad)
|
||||
"ap-southeast-1", # Asia Pacific (Singapore)
|
||||
"ap-southeast-2", # Asia Pacific (Sydney)
|
||||
]
|
||||
|
||||
def get_sa_regions(self) -> List[str]:
|
||||
return ["sa-east-1"]
|
||||
|
@ -54,10 +65,14 @@ class AmazonBedrockGlobalConfig:
|
|||
Source: https://www.aws-services.info/bedrock.html
|
||||
"""
|
||||
return [
|
||||
"eu-west-1",
|
||||
"eu-west-2",
|
||||
"eu-west-3",
|
||||
"eu-central-1",
|
||||
"eu-west-1", # Europe (Ireland)
|
||||
"eu-west-2", # Europe (London)
|
||||
"eu-west-3", # Europe (Paris)
|
||||
"eu-central-1", # Europe (Frankfurt)
|
||||
"eu-central-2", # Europe (Zurich)
|
||||
"eu-south-1", # Europe (Milan)
|
||||
"eu-south-2", # Europe (Spain)
|
||||
"eu-north-1", # Europe (Stockholm)
|
||||
]
|
||||
|
||||
def get_ca_regions(self) -> List[str]:
|
||||
|
@ -68,11 +83,11 @@ class AmazonBedrockGlobalConfig:
|
|||
Source: https://www.aws-services.info/bedrock.html
|
||||
"""
|
||||
return [
|
||||
"us-east-2",
|
||||
"us-east-1",
|
||||
"us-west-1",
|
||||
"us-west-2",
|
||||
"us-gov-west-1",
|
||||
"us-east-1", # US East (N. Virginia)
|
||||
"us-east-2", # US East (Ohio)
|
||||
"us-west-2", # US West (Oregon)
|
||||
"us-gov-east-1", # AWS GovCloud (US-East)
|
||||
"us-gov-west-1", # AWS GovCloud (US-West)
|
||||
]
|
||||
|
||||
|
||||
|
|
356
litellm/llms/cohere/chat/v2_transformation.py
Normal file
356
litellm/llms/cohere/chat/v2_transformation.py
Normal file
|
@ -0,0 +1,356 @@
|
|||
import time
|
||||
from typing import TYPE_CHECKING, Any, AsyncIterator, Iterator, List, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm.litellm_core_utils.prompt_templates.factory import cohere_messages_pt_v2
|
||||
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
|
||||
from litellm.types.llms.cohere import CohereV2ChatResponse
|
||||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionToolCallChunk
|
||||
from litellm.types.utils import ModelResponse, Usage
|
||||
|
||||
from ..common_utils import CohereError
|
||||
from ..common_utils import ModelResponseIterator as CohereModelResponseIterator
|
||||
from ..common_utils import validate_environment as cohere_validate_environment
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
|
||||
|
||||
LiteLLMLoggingObj = _LiteLLMLoggingObj
|
||||
else:
|
||||
LiteLLMLoggingObj = Any
|
||||
|
||||
|
||||
class CohereV2ChatConfig(BaseConfig):
|
||||
"""
|
||||
Configuration class for Cohere's API interface.
|
||||
|
||||
Args:
|
||||
preamble (str, optional): When specified, the default Cohere preamble will be replaced with the provided one.
|
||||
chat_history (List[Dict[str, str]], optional): A list of previous messages between the user and the model.
|
||||
generation_id (str, optional): Unique identifier for the generated reply.
|
||||
response_id (str, optional): Unique identifier for the response.
|
||||
conversation_id (str, optional): An alternative to chat_history, creates or resumes a persisted conversation.
|
||||
prompt_truncation (str, optional): Dictates how the prompt will be constructed. Options: 'AUTO', 'AUTO_PRESERVE_ORDER', 'OFF'.
|
||||
connectors (List[Dict[str, str]], optional): List of connectors (e.g., web-search) to enrich the model's reply.
|
||||
search_queries_only (bool, optional): When true, the response will only contain a list of generated search queries.
|
||||
documents (List[Dict[str, str]], optional): A list of relevant documents that the model can cite.
|
||||
temperature (float, optional): A non-negative float that tunes the degree of randomness in generation.
|
||||
max_tokens (int, optional): The maximum number of tokens the model will generate as part of the response.
|
||||
k (int, optional): Ensures only the top k most likely tokens are considered for generation at each step.
|
||||
p (float, optional): Ensures that only the most likely tokens, with total probability mass of p, are considered for generation.
|
||||
frequency_penalty (float, optional): Used to reduce repetitiveness of generated tokens.
|
||||
presence_penalty (float, optional): Used to reduce repetitiveness of generated tokens.
|
||||
tools (List[Dict[str, str]], optional): A list of available tools (functions) that the model may suggest invoking.
|
||||
tool_results (List[Dict[str, Any]], optional): A list of results from invoking tools.
|
||||
seed (int, optional): A seed to assist reproducibility of the model's response.
|
||||
"""
|
||||
|
||||
preamble: Optional[str] = None
|
||||
chat_history: Optional[list] = None
|
||||
generation_id: Optional[str] = None
|
||||
response_id: Optional[str] = None
|
||||
conversation_id: Optional[str] = None
|
||||
prompt_truncation: Optional[str] = None
|
||||
connectors: Optional[list] = None
|
||||
search_queries_only: Optional[bool] = None
|
||||
documents: Optional[list] = None
|
||||
temperature: Optional[int] = None
|
||||
max_tokens: Optional[int] = None
|
||||
k: Optional[int] = None
|
||||
p: Optional[int] = None
|
||||
frequency_penalty: Optional[int] = None
|
||||
presence_penalty: Optional[int] = None
|
||||
tools: Optional[list] = None
|
||||
tool_results: Optional[list] = None
|
||||
seed: Optional[int] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
preamble: Optional[str] = None,
|
||||
chat_history: Optional[list] = None,
|
||||
generation_id: Optional[str] = None,
|
||||
response_id: Optional[str] = None,
|
||||
conversation_id: Optional[str] = None,
|
||||
prompt_truncation: Optional[str] = None,
|
||||
connectors: Optional[list] = None,
|
||||
search_queries_only: Optional[bool] = None,
|
||||
documents: Optional[list] = None,
|
||||
temperature: Optional[int] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
k: Optional[int] = None,
|
||||
p: Optional[int] = None,
|
||||
frequency_penalty: Optional[int] = None,
|
||||
presence_penalty: Optional[int] = None,
|
||||
tools: Optional[list] = None,
|
||||
tool_results: Optional[list] = None,
|
||||
seed: Optional[int] = None,
|
||||
) -> None:
|
||||
locals_ = locals()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
return cohere_validate_environment(
|
||||
headers=headers,
|
||||
model=model,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
api_key=api_key,
|
||||
)
|
||||
|
||||
def get_supported_openai_params(self, model: str) -> List[str]:
|
||||
return [
|
||||
"stream",
|
||||
"temperature",
|
||||
"max_tokens",
|
||||
"top_p",
|
||||
"frequency_penalty",
|
||||
"presence_penalty",
|
||||
"stop",
|
||||
"n",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
"seed",
|
||||
"extra_headers",
|
||||
]
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
for param, value in non_default_params.items():
|
||||
if param == "stream":
|
||||
optional_params["stream"] = value
|
||||
if param == "temperature":
|
||||
optional_params["temperature"] = value
|
||||
if param == "max_tokens":
|
||||
optional_params["max_tokens"] = value
|
||||
if param == "n":
|
||||
optional_params["num_generations"] = value
|
||||
if param == "top_p":
|
||||
optional_params["p"] = value
|
||||
if param == "frequency_penalty":
|
||||
optional_params["frequency_penalty"] = value
|
||||
if param == "presence_penalty":
|
||||
optional_params["presence_penalty"] = value
|
||||
if param == "stop":
|
||||
optional_params["stop_sequences"] = value
|
||||
if param == "tools":
|
||||
optional_params["tools"] = value
|
||||
if param == "seed":
|
||||
optional_params["seed"] = value
|
||||
return optional_params
|
||||
|
||||
def transform_request(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
headers: dict,
|
||||
) -> dict:
|
||||
## Load Config
|
||||
for k, v in litellm.CohereChatConfig.get_config().items():
|
||||
if (
|
||||
k not in optional_params
|
||||
): # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||
optional_params[k] = v
|
||||
|
||||
most_recent_message, chat_history = cohere_messages_pt_v2(
|
||||
messages=messages, model=model, llm_provider="cohere_chat"
|
||||
)
|
||||
|
||||
## Handle Tool Calling
|
||||
if "tools" in optional_params:
|
||||
_is_function_call = True
|
||||
cohere_tools = self._construct_cohere_tool(tools=optional_params["tools"])
|
||||
optional_params["tools"] = cohere_tools
|
||||
if isinstance(most_recent_message, dict):
|
||||
optional_params["tool_results"] = [most_recent_message]
|
||||
elif isinstance(most_recent_message, str):
|
||||
optional_params["message"] = most_recent_message
|
||||
|
||||
## check if chat history message is 'user' and 'tool_results' is given -> force_single_step=True, else cohere api fails
|
||||
if len(chat_history) > 0 and chat_history[-1]["role"] == "USER":
|
||||
optional_params["force_single_step"] = True
|
||||
|
||||
return optional_params
|
||||
|
||||
def transform_response(
|
||||
self,
|
||||
model: str,
|
||||
raw_response: httpx.Response,
|
||||
model_response: ModelResponse,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
request_data: dict,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
encoding: Any,
|
||||
api_key: Optional[str] = None,
|
||||
json_mode: Optional[bool] = None,
|
||||
) -> ModelResponse:
|
||||
try:
|
||||
raw_response_json = raw_response.json()
|
||||
except Exception:
|
||||
raise CohereError(
|
||||
message=raw_response.text, status_code=raw_response.status_code
|
||||
)
|
||||
|
||||
try:
|
||||
cohere_v2_chat_response = CohereV2ChatResponse(**raw_response_json) # type: ignore
|
||||
except Exception:
|
||||
raise CohereError(message=raw_response.text, status_code=422)
|
||||
|
||||
cohere_content = cohere_v2_chat_response["message"].get("content", None)
|
||||
if cohere_content is not None:
|
||||
model_response.choices[0].message.content = "".join( # type: ignore
|
||||
[
|
||||
content.get("text", "")
|
||||
for content in cohere_content
|
||||
if content is not None
|
||||
]
|
||||
)
|
||||
|
||||
## ADD CITATIONS
|
||||
if "citations" in cohere_v2_chat_response:
|
||||
setattr(model_response, "citations", cohere_v2_chat_response["citations"])
|
||||
|
||||
## Tool calling response
|
||||
cohere_tools_response = cohere_v2_chat_response["message"].get("tool_calls", [])
|
||||
if cohere_tools_response is not None and cohere_tools_response != []:
|
||||
# convert cohere_tools_response to OpenAI response format
|
||||
tool_calls: List[ChatCompletionToolCallChunk] = []
|
||||
for index, tool in enumerate(cohere_tools_response):
|
||||
tool_call: ChatCompletionToolCallChunk = {
|
||||
**tool, # type: ignore
|
||||
"index": index,
|
||||
}
|
||||
tool_calls.append(tool_call)
|
||||
_message = litellm.Message(
|
||||
tool_calls=tool_calls,
|
||||
content=None,
|
||||
)
|
||||
model_response.choices[0].message = _message # type: ignore
|
||||
|
||||
## CALCULATING USAGE - use cohere `billed_units` for returning usage
|
||||
token_usage = cohere_v2_chat_response["usage"].get("tokens", {})
|
||||
prompt_tokens = token_usage.get("input_tokens", 0)
|
||||
completion_tokens = token_usage.get("output_tokens", 0)
|
||||
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
setattr(model_response, "usage", usage)
|
||||
return model_response
|
||||
|
||||
def _construct_cohere_tool(
|
||||
self,
|
||||
tools: Optional[list] = None,
|
||||
):
|
||||
if tools is None:
|
||||
tools = []
|
||||
cohere_tools = []
|
||||
for tool in tools:
|
||||
cohere_tool = self._translate_openai_tool_to_cohere(tool)
|
||||
cohere_tools.append(cohere_tool)
|
||||
return cohere_tools
|
||||
|
||||
def _translate_openai_tool_to_cohere(
|
||||
self,
|
||||
openai_tool: dict,
|
||||
):
|
||||
# cohere tools look like this
|
||||
"""
|
||||
{
|
||||
"name": "query_daily_sales_report",
|
||||
"description": "Connects to a database to retrieve overall sales volumes and sales information for a given day.",
|
||||
"parameter_definitions": {
|
||||
"day": {
|
||||
"description": "Retrieves sales data for this day, formatted as YYYY-MM-DD.",
|
||||
"type": "str",
|
||||
"required": True
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
# OpenAI tools look like this
|
||||
"""
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
"""
|
||||
cohere_tool = {
|
||||
"name": openai_tool["function"]["name"],
|
||||
"description": openai_tool["function"]["description"],
|
||||
"parameter_definitions": {},
|
||||
}
|
||||
|
||||
for param_name, param_def in openai_tool["function"]["parameters"][
|
||||
"properties"
|
||||
].items():
|
||||
required_params = (
|
||||
openai_tool.get("function", {})
|
||||
.get("parameters", {})
|
||||
.get("required", [])
|
||||
)
|
||||
cohere_param_def = {
|
||||
"description": param_def.get("description", ""),
|
||||
"type": param_def.get("type", ""),
|
||||
"required": param_name in required_params,
|
||||
}
|
||||
cohere_tool["parameter_definitions"][param_name] = cohere_param_def
|
||||
|
||||
return cohere_tool
|
||||
|
||||
def get_model_response_iterator(
|
||||
self,
|
||||
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
|
||||
sync_stream: bool,
|
||||
json_mode: Optional[bool] = False,
|
||||
):
|
||||
return CohereModelResponseIterator(
|
||||
streaming_response=streaming_response,
|
||||
sync_stream=sync_stream,
|
||||
json_mode=json_mode,
|
||||
)
|
||||
|
||||
def get_error_class(
|
||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||
) -> BaseLLMException:
|
||||
return CohereError(status_code=status_code, message=error_message)
|
|
@ -104,19 +104,28 @@ class ModelResponseIterator:
|
|||
raise RuntimeError(f"Error receiving chunk from stream: {e}")
|
||||
|
||||
try:
|
||||
str_line = chunk
|
||||
if isinstance(chunk, bytes): # Handle binary data
|
||||
str_line = chunk.decode("utf-8") # Convert bytes to string
|
||||
index = str_line.find("data:")
|
||||
if index != -1:
|
||||
str_line = str_line[index:]
|
||||
data_json = json.loads(str_line)
|
||||
return self.chunk_parser(chunk=data_json)
|
||||
return self.convert_str_chunk_to_generic_chunk(chunk=chunk)
|
||||
except StopIteration:
|
||||
raise StopIteration
|
||||
except ValueError as e:
|
||||
raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
|
||||
|
||||
def convert_str_chunk_to_generic_chunk(self, chunk: str) -> GenericStreamingChunk:
|
||||
"""
|
||||
Convert a string chunk to a GenericStreamingChunk
|
||||
|
||||
Note: This is used for Cohere pass through streaming logging
|
||||
"""
|
||||
str_line = chunk
|
||||
if isinstance(chunk, bytes): # Handle binary data
|
||||
str_line = chunk.decode("utf-8") # Convert bytes to string
|
||||
index = str_line.find("data:")
|
||||
if index != -1:
|
||||
str_line = str_line[index:]
|
||||
|
||||
data_json = json.loads(str_line)
|
||||
return self.chunk_parser(chunk=data_json)
|
||||
|
||||
# Async iterator
|
||||
def __aiter__(self):
|
||||
self.async_response_iterator = self.streaming_response.__aiter__()
|
||||
|
@ -131,15 +140,7 @@ class ModelResponseIterator:
|
|||
raise RuntimeError(f"Error receiving chunk from stream: {e}")
|
||||
|
||||
try:
|
||||
str_line = chunk
|
||||
if isinstance(chunk, bytes): # Handle binary data
|
||||
str_line = chunk.decode("utf-8") # Convert bytes to string
|
||||
index = str_line.find("data:")
|
||||
if index != -1:
|
||||
str_line = str_line[index:]
|
||||
|
||||
data_json = json.loads(str_line)
|
||||
return self.chunk_parser(chunk=data_json)
|
||||
return self.convert_str_chunk_to_generic_chunk(chunk=chunk)
|
||||
except StopAsyncIteration:
|
||||
raise StopAsyncIteration
|
||||
except ValueError as e:
|
||||
|
|
|
@ -389,7 +389,7 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig):
|
|||
)
|
||||
|
||||
@staticmethod
|
||||
def get_base_model(model: str) -> str:
|
||||
def get_base_model(model: Optional[str] = None) -> Optional[str]:
|
||||
return model
|
||||
|
||||
def get_model_response_iterator(
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from typing import List, Optional
|
||||
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import AllMessageValues
|
||||
|
||||
from ..base_llm.base_utils import BaseLLMModelInfo
|
||||
from ..base_llm.chat.transformation import BaseLLMException
|
||||
|
@ -11,6 +12,26 @@ class TopazException(BaseLLMException):
|
|||
|
||||
|
||||
class TopazModelInfo(BaseLLMModelInfo):
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
if api_key is None:
|
||||
raise ValueError(
|
||||
"API key is required for Topaz image variations. Set via `TOPAZ_API_KEY` or `api_key=..`"
|
||||
)
|
||||
return {
|
||||
# "Content-Type": "multipart/form-data",
|
||||
"Accept": "image/jpeg",
|
||||
"X-API-Key": api_key,
|
||||
}
|
||||
|
||||
def get_models(
|
||||
self, api_key: Optional[str] = None, api_base: Optional[str] = None
|
||||
) -> List[str]:
|
||||
|
|
|
@ -10,10 +10,7 @@ from litellm.llms.base_llm.chat.transformation import (
|
|||
BaseLLMException,
|
||||
LiteLLMLoggingObj,
|
||||
)
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
OpenAIImageVariationOptionalParams,
|
||||
)
|
||||
from litellm.types.llms.openai import OpenAIImageVariationOptionalParams
|
||||
from litellm.types.utils import (
|
||||
FileTypes,
|
||||
HttpHandlerRequestFields,
|
||||
|
@ -22,35 +19,15 @@ from litellm.types.utils import (
|
|||
)
|
||||
|
||||
from ...base_llm.image_variations.transformation import BaseImageVariationConfig
|
||||
from ..common_utils import TopazException
|
||||
from ..common_utils import TopazException, TopazModelInfo
|
||||
|
||||
|
||||
class TopazImageVariationConfig(BaseImageVariationConfig):
|
||||
class TopazImageVariationConfig(TopazModelInfo, BaseImageVariationConfig):
|
||||
def get_supported_openai_params(
|
||||
self, model: str
|
||||
) -> List[OpenAIImageVariationOptionalParams]:
|
||||
return ["response_format", "size"]
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
if api_key is None:
|
||||
raise ValueError(
|
||||
"API key is required for Topaz image variations. Set via `TOPAZ_API_KEY` or `api_key=..`"
|
||||
)
|
||||
return {
|
||||
# "Content-Type": "multipart/form-data",
|
||||
"Accept": "image/jpeg",
|
||||
"X-API-Key": api_key,
|
||||
}
|
||||
|
||||
def get_complete_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
|
|
75
litellm/llms/vllm/common_utils.py
Normal file
75
litellm/llms/vllm/common_utils.py
Normal file
|
@ -0,0 +1,75 @@
|
|||
from typing import List, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
|
||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import AllMessageValues
|
||||
from litellm.utils import _add_path_to_api_base
|
||||
|
||||
|
||||
class VLLMError(BaseLLMException):
|
||||
pass
|
||||
|
||||
|
||||
class VLLMModelInfo(BaseLLMModelInfo):
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""Google AI Studio sends api key in query params"""
|
||||
return headers
|
||||
|
||||
@staticmethod
|
||||
def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
|
||||
api_base = api_base or get_secret_str("VLLM_API_BASE")
|
||||
if api_base is None:
|
||||
raise ValueError(
|
||||
"VLLM_API_BASE is not set. Please set the environment variable, to use VLLM's pass-through - `{LITELLM_API_BASE}/vllm/{endpoint}`."
|
||||
)
|
||||
return api_base
|
||||
|
||||
@staticmethod
|
||||
def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_base_model(model: str) -> Optional[str]:
|
||||
return model
|
||||
|
||||
def get_models(
|
||||
self, api_key: Optional[str] = None, api_base: Optional[str] = None
|
||||
) -> List[str]:
|
||||
api_base = VLLMModelInfo.get_api_base(api_base)
|
||||
api_key = VLLMModelInfo.get_api_key(api_key)
|
||||
endpoint = "/v1/models"
|
||||
if api_base is None or api_key is None:
|
||||
raise ValueError(
|
||||
"GEMINI_API_BASE or GEMINI_API_KEY is not set. Please set the environment variable, to query Gemini's `/models` endpoint."
|
||||
)
|
||||
|
||||
url = _add_path_to_api_base(api_base, endpoint)
|
||||
response = litellm.module_level_client.get(
|
||||
url=url,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
models = response.json()["data"]
|
||||
|
||||
return [model["id"] for model in models]
|
||||
|
||||
def get_error_class(
|
||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||
) -> BaseLLMException:
|
||||
return VLLMError(
|
||||
status_code=status_code, message=error_message, headers=headers
|
||||
)
|
|
@ -5,9 +5,29 @@ import httpx
|
|||
import litellm
|
||||
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import AllMessageValues
|
||||
|
||||
|
||||
class XAIModelInfo(BaseLLMModelInfo):
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
if api_key is not None:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
# Ensure Content-Type is set to application/json
|
||||
if "content-type" not in headers and "Content-Type" not in headers:
|
||||
headers["Content-Type"] = "application/json"
|
||||
|
||||
return headers
|
||||
|
||||
@staticmethod
|
||||
def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
|
||||
return api_base or get_secret_str("XAI_API_BASE") or "https://api.x.ai"
|
||||
|
|
|
@ -954,7 +954,11 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
non_default_params = get_non_default_completion_params(kwargs=kwargs)
|
||||
litellm_params = {} # used to prevent unbound var errors
|
||||
## PROMPT MANAGEMENT HOOKS ##
|
||||
if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and prompt_id is not None:
|
||||
if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and (
|
||||
litellm_logging_obj.should_run_prompt_management_hooks(
|
||||
prompt_id=prompt_id, non_default_params=non_default_params
|
||||
)
|
||||
):
|
||||
(
|
||||
model,
|
||||
messages,
|
||||
|
@ -2654,9 +2658,9 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
"aws_region_name" not in optional_params
|
||||
or optional_params["aws_region_name"] is None
|
||||
):
|
||||
optional_params[
|
||||
"aws_region_name"
|
||||
] = aws_bedrock_client.meta.region_name
|
||||
optional_params["aws_region_name"] = (
|
||||
aws_bedrock_client.meta.region_name
|
||||
)
|
||||
|
||||
bedrock_route = BedrockModelInfo.get_bedrock_route(model)
|
||||
if bedrock_route == "converse":
|
||||
|
@ -4363,9 +4367,9 @@ def adapter_completion(
|
|||
new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs)
|
||||
|
||||
response: Union[ModelResponse, CustomStreamWrapper] = completion(**new_kwargs) # type: ignore
|
||||
translated_response: Optional[
|
||||
Union[BaseModel, AdapterCompletionStreamWrapper]
|
||||
] = None
|
||||
translated_response: Optional[Union[BaseModel, AdapterCompletionStreamWrapper]] = (
|
||||
None
|
||||
)
|
||||
if isinstance(response, ModelResponse):
|
||||
translated_response = translation_obj.translate_completion_output_params(
|
||||
response=response
|
||||
|
@ -5785,9 +5789,9 @@ def stream_chunk_builder( # noqa: PLR0915
|
|||
]
|
||||
|
||||
if len(content_chunks) > 0:
|
||||
response["choices"][0]["message"][
|
||||
"content"
|
||||
] = processor.get_combined_content(content_chunks)
|
||||
response["choices"][0]["message"]["content"] = (
|
||||
processor.get_combined_content(content_chunks)
|
||||
)
|
||||
|
||||
reasoning_chunks = [
|
||||
chunk
|
||||
|
@ -5798,9 +5802,9 @@ def stream_chunk_builder( # noqa: PLR0915
|
|||
]
|
||||
|
||||
if len(reasoning_chunks) > 0:
|
||||
response["choices"][0]["message"][
|
||||
"reasoning_content"
|
||||
] = processor.get_combined_reasoning_content(reasoning_chunks)
|
||||
response["choices"][0]["message"]["reasoning_content"] = (
|
||||
processor.get_combined_reasoning_content(reasoning_chunks)
|
||||
)
|
||||
|
||||
audio_chunks = [
|
||||
chunk
|
||||
|
|
|
@ -2669,7 +2669,7 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000008,
|
||||
"input_cost_per_audio_token": 0.000004,
|
||||
"output_cost_per_token": 0.00032,
|
||||
"output_cost_per_token": 0.00000032,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_audio_input": true,
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-87ec698bfca6820e.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/61b39adb1cb75213.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[98621,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-6810261f4d6c8bbf.js\",\"261\",\"static/chunks/261-d4b99bc9f53d4ef3.js\",\"899\",\"static/chunks/899-8d832fe7c09b2afe.js\",\"817\",\"static/chunks/817-c8c94d51d28af935.js\",\"250\",\"static/chunks/250-f21e8c1de1717077.js\",\"699\",\"static/chunks/699-b82671c2a3a100c7.js\",\"931\",\"static/chunks/app/page-0f39fb1d0bde3c55.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"eKo9wJb6ZFjAy85GaOeQ6\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/61b39adb1cb75213.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-87ec698bfca6820e.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/61b39adb1cb75213.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[37185,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-6810261f4d6c8bbf.js\",\"261\",\"static/chunks/261-d4b99bc9f53d4ef3.js\",\"899\",\"static/chunks/899-8d832fe7c09b2afe.js\",\"817\",\"static/chunks/817-c8c94d51d28af935.js\",\"250\",\"static/chunks/250-ef859ae76fcc7788.js\",\"699\",\"static/chunks/699-b82671c2a3a100c7.js\",\"931\",\"static/chunks/app/page-0f22a8aad3780430.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"cxKWUnb1loD53gKRUqiFm\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/61b39adb1cb75213.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[98621,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","899","static/chunks/899-8d832fe7c09b2afe.js","817","static/chunks/817-c8c94d51d28af935.js","250","static/chunks/250-f21e8c1de1717077.js","699","static/chunks/699-b82671c2a3a100c7.js","931","static/chunks/app/page-0f39fb1d0bde3c55.js"],"default",1]
|
||||
3:I[37185,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","899","static/chunks/899-8d832fe7c09b2afe.js","817","static/chunks/817-c8c94d51d28af935.js","250","static/chunks/250-ef859ae76fcc7788.js","699","static/chunks/699-b82671c2a3a100c7.js","931","static/chunks/app/page-0f22a8aad3780430.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["eKo9wJb6ZFjAy85GaOeQ6",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["cxKWUnb1loD53gKRUqiFm",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[52829,["42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","250","static/chunks/250-f21e8c1de1717077.js","699","static/chunks/699-b82671c2a3a100c7.js","418","static/chunks/app/model_hub/page-068a441595bd0fc3.js"],"default",1]
|
||||
3:I[52829,["42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","250","static/chunks/250-ef859ae76fcc7788.js","699","static/chunks/699-b82671c2a3a100c7.js","418","static/chunks/app/model_hub/page-068a441595bd0fc3.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["eKo9wJb6ZFjAy85GaOeQ6",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["cxKWUnb1loD53gKRUqiFm",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-6810261f4d6c8bbf.js","899","static/chunks/899-8d832fe7c09b2afe.js","250","static/chunks/250-f21e8c1de1717077.js","461","static/chunks/app/onboarding/page-957334f0c5d5df41.js"],"default",1]
|
||||
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-6810261f4d6c8bbf.js","899","static/chunks/899-8d832fe7c09b2afe.js","250","static/chunks/250-ef859ae76fcc7788.js","461","static/chunks/app/onboarding/page-957334f0c5d5df41.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["eKo9wJb6ZFjAy85GaOeQ6",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["cxKWUnb1loD53gKRUqiFm",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -33,6 +33,7 @@ model_list:
|
|||
litellm_settings:
|
||||
num_retries: 0
|
||||
callbacks: ["prometheus"]
|
||||
check_provider_endpoint: true
|
||||
|
||||
files_settings:
|
||||
- custom_llm_provider: gemini
|
||||
|
|
|
@ -317,6 +317,8 @@ class LiteLLMRoutes(enum.Enum):
|
|||
"/openai",
|
||||
"/assemblyai",
|
||||
"/eu.assemblyai",
|
||||
"/vllm",
|
||||
"/mistral",
|
||||
]
|
||||
|
||||
anthropic_routes = [
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
# What is this?
|
||||
## Common checks for /v1/models and `/model/info`
|
||||
import copy
|
||||
from typing import Dict, List, Optional, Set
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy._types import SpecialModelNames, UserAPIKeyAuth
|
||||
from litellm.router import Router
|
||||
from litellm.types.router import LiteLLM_Params
|
||||
from litellm.utils import get_valid_models
|
||||
|
||||
|
||||
|
@ -23,15 +24,20 @@ def _check_wildcard_routing(model: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def get_provider_models(provider: str) -> Optional[List[str]]:
|
||||
def get_provider_models(
|
||||
provider: str, litellm_params: Optional[LiteLLM_Params] = None
|
||||
) -> Optional[List[str]]:
|
||||
"""
|
||||
Returns the list of known models by provider
|
||||
"""
|
||||
if provider == "*":
|
||||
return get_valid_models()
|
||||
return get_valid_models(litellm_params=litellm_params)
|
||||
|
||||
if provider in litellm.models_by_provider:
|
||||
provider_models = copy.deepcopy(litellm.models_by_provider[provider])
|
||||
provider_models = get_valid_models(
|
||||
custom_llm_provider=provider, litellm_params=litellm_params
|
||||
)
|
||||
# provider_models = copy.deepcopy(litellm.models_by_provider[provider])
|
||||
for idx, _model in enumerate(provider_models):
|
||||
if provider not in _model:
|
||||
provider_models[idx] = f"{provider}/{_model}"
|
||||
|
@ -118,6 +124,7 @@ def get_complete_model_list(
|
|||
user_model: Optional[str],
|
||||
infer_model_from_keys: Optional[bool],
|
||||
return_wildcard_routes: Optional[bool] = False,
|
||||
llm_router: Optional[Router] = None,
|
||||
) -> List[str]:
|
||||
"""Logic for returning complete model list for a given key + team pair"""
|
||||
|
||||
|
@ -143,19 +150,25 @@ def get_complete_model_list(
|
|||
unique_models.update(valid_models)
|
||||
|
||||
all_wildcard_models = _get_wildcard_models(
|
||||
unique_models=unique_models, return_wildcard_routes=return_wildcard_routes
|
||||
unique_models=unique_models,
|
||||
return_wildcard_routes=return_wildcard_routes,
|
||||
llm_router=llm_router,
|
||||
)
|
||||
|
||||
return list(unique_models) + all_wildcard_models
|
||||
|
||||
|
||||
def get_known_models_from_wildcard(wildcard_model: str) -> List[str]:
|
||||
def get_known_models_from_wildcard(
|
||||
wildcard_model: str, litellm_params: Optional[LiteLLM_Params] = None
|
||||
) -> List[str]:
|
||||
try:
|
||||
provider, model = wildcard_model.split("/", 1)
|
||||
except ValueError: # safely fail
|
||||
return []
|
||||
# get all known provider models
|
||||
wildcard_models = get_provider_models(provider=provider)
|
||||
wildcard_models = get_provider_models(
|
||||
provider=provider, litellm_params=litellm_params
|
||||
)
|
||||
if wildcard_models is None:
|
||||
return []
|
||||
if model == "*":
|
||||
|
@ -172,7 +185,9 @@ def get_known_models_from_wildcard(wildcard_model: str) -> List[str]:
|
|||
|
||||
|
||||
def _get_wildcard_models(
|
||||
unique_models: Set[str], return_wildcard_routes: Optional[bool] = False
|
||||
unique_models: Set[str],
|
||||
return_wildcard_routes: Optional[bool] = False,
|
||||
llm_router: Optional[Router] = None,
|
||||
) -> List[str]:
|
||||
models_to_remove = set()
|
||||
all_wildcard_models = []
|
||||
|
@ -183,12 +198,25 @@ def _get_wildcard_models(
|
|||
): # will add the wildcard route to the list eg: anthropic/*.
|
||||
all_wildcard_models.append(model)
|
||||
|
||||
# get all known provider models
|
||||
wildcard_models = get_known_models_from_wildcard(wildcard_model=model)
|
||||
## get litellm params from model
|
||||
if llm_router is not None:
|
||||
model_list = llm_router.get_model_list(model_name=model)
|
||||
if model_list is not None:
|
||||
for router_model in model_list:
|
||||
wildcard_models = get_known_models_from_wildcard(
|
||||
wildcard_model=model,
|
||||
litellm_params=LiteLLM_Params(
|
||||
**router_model["litellm_params"] # type: ignore
|
||||
),
|
||||
)
|
||||
all_wildcard_models.extend(wildcard_models)
|
||||
else:
|
||||
# get all known provider models
|
||||
wildcard_models = get_known_models_from_wildcard(wildcard_model=model)
|
||||
|
||||
if wildcard_models is not None:
|
||||
models_to_remove.add(model)
|
||||
all_wildcard_models.extend(wildcard_models)
|
||||
if wildcard_models is not None:
|
||||
models_to_remove.add(model)
|
||||
all_wildcard_models.extend(wildcard_models)
|
||||
|
||||
for model in models_to_remove:
|
||||
unique_models.remove(model)
|
||||
|
|
|
@ -12,7 +12,7 @@ class X42PromptManagement(CustomPromptManagement):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
non_default_params: dict,
|
||||
prompt_id: str,
|
||||
prompt_id: Optional[str],
|
||||
prompt_variables: Optional[dict],
|
||||
dynamic_callback_params: StandardCallbackDynamicParams,
|
||||
) -> Tuple[str, List[AllMessageValues], dict]:
|
||||
|
|
|
@ -6,6 +6,7 @@ Provider-specific Pass-Through Endpoints
|
|||
Use litellm with Anthropic SDK, Vertex AI SDK, Cohere SDK, etc.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
@ -43,6 +44,84 @@ def create_request_copy(request: Request):
|
|||
}
|
||||
|
||||
|
||||
async def llm_passthrough_factory_proxy_route(
|
||||
custom_llm_provider: str,
|
||||
endpoint: str,
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Factory function for creating pass-through endpoints for LLM providers.
|
||||
"""
|
||||
from litellm.types.utils import LlmProviders
|
||||
from litellm.utils import ProviderConfigManager
|
||||
|
||||
provider_config = ProviderConfigManager.get_provider_model_info(
|
||||
provider=LlmProviders(custom_llm_provider),
|
||||
model=None,
|
||||
)
|
||||
if provider_config is None:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Provider {custom_llm_provider} not found"
|
||||
)
|
||||
base_target_url = provider_config.get_api_base()
|
||||
|
||||
if base_target_url is None:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Provider {custom_llm_provider} api base not found"
|
||||
)
|
||||
|
||||
encoded_endpoint = httpx.URL(endpoint).path
|
||||
|
||||
# Ensure endpoint starts with '/' for proper URL construction
|
||||
if not encoded_endpoint.startswith("/"):
|
||||
encoded_endpoint = "/" + encoded_endpoint
|
||||
|
||||
# Construct the full target URL using httpx
|
||||
base_url = httpx.URL(base_target_url)
|
||||
updated_url = base_url.copy_with(path=encoded_endpoint)
|
||||
|
||||
# Add or update query parameters
|
||||
provider_api_key = passthrough_endpoint_router.get_credentials(
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
region_name=None,
|
||||
)
|
||||
|
||||
auth_headers = provider_config.validate_environment(
|
||||
headers={},
|
||||
model="",
|
||||
messages=[],
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
api_key=provider_api_key,
|
||||
api_base=base_target_url,
|
||||
)
|
||||
|
||||
## check for streaming
|
||||
is_streaming_request = False
|
||||
# anthropic is streaming when 'stream' = True is in the body
|
||||
if request.method == "POST":
|
||||
_request_body = await request.json()
|
||||
if _request_body.get("stream"):
|
||||
is_streaming_request = True
|
||||
|
||||
## CREATE PASS-THROUGH
|
||||
endpoint_func = create_pass_through_route(
|
||||
endpoint=endpoint,
|
||||
target=str(updated_url),
|
||||
custom_headers=auth_headers,
|
||||
) # dynamically construct pass-through endpoint based on incoming path
|
||||
received_value = await endpoint_func(
|
||||
request,
|
||||
fastapi_response,
|
||||
user_api_key_dict,
|
||||
stream=is_streaming_request, # type: ignore
|
||||
)
|
||||
|
||||
return received_value
|
||||
|
||||
|
||||
@router.api_route(
|
||||
"/gemini/{endpoint:path}",
|
||||
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
|
||||
|
@ -162,6 +241,84 @@ async def cohere_proxy_route(
|
|||
return received_value
|
||||
|
||||
|
||||
@router.api_route(
|
||||
"/vllm/{endpoint:path}",
|
||||
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
|
||||
tags=["VLLM Pass-through", "pass-through"],
|
||||
)
|
||||
async def vllm_proxy_route(
|
||||
endpoint: str,
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
[Docs](https://docs.litellm.ai/docs/pass_through/vllm)
|
||||
"""
|
||||
return await llm_passthrough_factory_proxy_route(
|
||||
endpoint=endpoint,
|
||||
request=request,
|
||||
fastapi_response=fastapi_response,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
custom_llm_provider="vllm",
|
||||
)
|
||||
|
||||
|
||||
@router.api_route(
|
||||
"/mistral/{endpoint:path}",
|
||||
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
|
||||
tags=["Mistral Pass-through", "pass-through"],
|
||||
)
|
||||
async def mistral_proxy_route(
|
||||
endpoint: str,
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
[Docs](https://docs.litellm.ai/docs/anthropic_completion)
|
||||
"""
|
||||
base_target_url = os.getenv("MISTRAL_API_BASE") or "https://api.mistral.ai"
|
||||
encoded_endpoint = httpx.URL(endpoint).path
|
||||
|
||||
# Ensure endpoint starts with '/' for proper URL construction
|
||||
if not encoded_endpoint.startswith("/"):
|
||||
encoded_endpoint = "/" + encoded_endpoint
|
||||
|
||||
# Construct the full target URL using httpx
|
||||
base_url = httpx.URL(base_target_url)
|
||||
updated_url = base_url.copy_with(path=encoded_endpoint)
|
||||
|
||||
# Add or update query parameters
|
||||
mistral_api_key = passthrough_endpoint_router.get_credentials(
|
||||
custom_llm_provider="mistral",
|
||||
region_name=None,
|
||||
)
|
||||
|
||||
## check for streaming
|
||||
is_streaming_request = False
|
||||
# anthropic is streaming when 'stream' = True is in the body
|
||||
if request.method == "POST":
|
||||
_request_body = await request.json()
|
||||
if _request_body.get("stream"):
|
||||
is_streaming_request = True
|
||||
|
||||
## CREATE PASS-THROUGH
|
||||
endpoint_func = create_pass_through_route(
|
||||
endpoint=endpoint,
|
||||
target=str(updated_url),
|
||||
custom_headers={"Authorization": "Bearer {}".format(mistral_api_key)},
|
||||
) # dynamically construct pass-through endpoint based on incoming path
|
||||
received_value = await endpoint_func(
|
||||
request,
|
||||
fastapi_response,
|
||||
user_api_key_dict,
|
||||
stream=is_streaming_request, # type: ignore
|
||||
)
|
||||
|
||||
return received_value
|
||||
|
||||
|
||||
@router.api_route(
|
||||
"/anthropic/{endpoint:path}",
|
||||
methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
|
||||
|
|
|
@ -0,0 +1,219 @@
|
|||
import json
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Any, List, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.litellm_core_utils.litellm_logging import (
|
||||
get_standard_logging_object_payload,
|
||||
)
|
||||
from litellm.llms.base_llm.chat.transformation import BaseConfig
|
||||
from litellm.proxy._types import PassThroughEndpointLoggingTypedDict
|
||||
from litellm.proxy.auth.auth_utils import get_end_user_id_from_request_body
|
||||
from litellm.proxy.pass_through_endpoints.types import PassthroughStandardLoggingPayload
|
||||
from litellm.types.utils import LlmProviders, ModelResponse, TextCompletionResponse
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..success_handler import PassThroughEndpointLogging
|
||||
from ..types import EndpointType
|
||||
else:
|
||||
PassThroughEndpointLogging = Any
|
||||
EndpointType = Any
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class BasePassthroughLoggingHandler(ABC):
|
||||
@property
|
||||
@abstractmethod
|
||||
def llm_provider_name(self) -> LlmProviders:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_provider_config(self, model: str) -> BaseConfig:
|
||||
pass
|
||||
|
||||
def passthrough_chat_handler(
|
||||
self,
|
||||
httpx_response: httpx.Response,
|
||||
response_body: dict,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
url_route: str,
|
||||
result: str,
|
||||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
cache_hit: bool,
|
||||
request_body: dict,
|
||||
**kwargs,
|
||||
) -> PassThroughEndpointLoggingTypedDict:
|
||||
"""
|
||||
Transforms LLM response to OpenAI response, generates a standard logging object so downstream logging can be handled
|
||||
"""
|
||||
model = request_body.get("model", response_body.get("model", ""))
|
||||
provider_config = self.get_provider_config(model=model)
|
||||
litellm_model_response: ModelResponse = provider_config.transform_response(
|
||||
raw_response=httpx_response,
|
||||
model_response=litellm.ModelResponse(),
|
||||
model=model,
|
||||
messages=[],
|
||||
logging_obj=logging_obj,
|
||||
optional_params={},
|
||||
api_key="",
|
||||
request_data={},
|
||||
encoding=litellm.encoding,
|
||||
json_mode=False,
|
||||
litellm_params={},
|
||||
)
|
||||
|
||||
kwargs = self._create_response_logging_payload(
|
||||
litellm_model_response=litellm_model_response,
|
||||
model=model,
|
||||
kwargs=kwargs,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
|
||||
return {
|
||||
"result": litellm_model_response,
|
||||
"kwargs": kwargs,
|
||||
}
|
||||
|
||||
def _get_user_from_metadata(
|
||||
self,
|
||||
passthrough_logging_payload: PassthroughStandardLoggingPayload,
|
||||
) -> Optional[str]:
|
||||
request_body = passthrough_logging_payload.get("request_body")
|
||||
if request_body:
|
||||
return get_end_user_id_from_request_body(request_body)
|
||||
return None
|
||||
|
||||
def _create_response_logging_payload(
|
||||
self,
|
||||
litellm_model_response: Union[ModelResponse, TextCompletionResponse],
|
||||
model: str,
|
||||
kwargs: dict,
|
||||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
) -> dict:
|
||||
"""
|
||||
Create the standard logging object for Generic LLM passthrough
|
||||
|
||||
handles streaming and non-streaming responses
|
||||
"""
|
||||
|
||||
try:
|
||||
response_cost = litellm.completion_cost(
|
||||
completion_response=litellm_model_response,
|
||||
model=model,
|
||||
)
|
||||
|
||||
kwargs["response_cost"] = response_cost
|
||||
kwargs["model"] = model
|
||||
passthrough_logging_payload: Optional[PassthroughStandardLoggingPayload] = ( # type: ignore
|
||||
kwargs.get("passthrough_logging_payload")
|
||||
)
|
||||
if passthrough_logging_payload:
|
||||
user = self._get_user_from_metadata(
|
||||
passthrough_logging_payload=passthrough_logging_payload,
|
||||
)
|
||||
if user:
|
||||
kwargs.setdefault("litellm_params", {})
|
||||
kwargs["litellm_params"].update(
|
||||
{"proxy_server_request": {"body": {"user": user}}}
|
||||
)
|
||||
|
||||
# Make standard logging object for Anthropic
|
||||
standard_logging_object = get_standard_logging_object_payload(
|
||||
kwargs=kwargs,
|
||||
init_response_obj=litellm_model_response,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=logging_obj,
|
||||
status="success",
|
||||
)
|
||||
|
||||
# pretty print standard logging object
|
||||
verbose_proxy_logger.debug(
|
||||
"standard_logging_object= %s",
|
||||
json.dumps(standard_logging_object, indent=4),
|
||||
)
|
||||
kwargs["standard_logging_object"] = standard_logging_object
|
||||
|
||||
# set litellm_call_id to logging response object
|
||||
litellm_model_response.id = logging_obj.litellm_call_id
|
||||
litellm_model_response.model = model
|
||||
logging_obj.model_call_details["model"] = model
|
||||
return kwargs
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.exception(
|
||||
"Error creating LLM passthrough response logging payload: %s", e
|
||||
)
|
||||
return kwargs
|
||||
|
||||
@abstractmethod
|
||||
def _build_complete_streaming_response(
|
||||
self,
|
||||
all_chunks: List[str],
|
||||
litellm_logging_obj: LiteLLMLoggingObj,
|
||||
model: str,
|
||||
) -> Optional[Union[ModelResponse, TextCompletionResponse]]:
|
||||
"""
|
||||
Builds complete response from raw chunks
|
||||
|
||||
- Converts str chunks to generic chunks
|
||||
- Converts generic chunks to litellm chunks (OpenAI format)
|
||||
- Builds complete response from litellm chunks
|
||||
"""
|
||||
pass
|
||||
|
||||
def _handle_logging_llm_collected_chunks(
|
||||
self,
|
||||
litellm_logging_obj: LiteLLMLoggingObj,
|
||||
passthrough_success_handler_obj: PassThroughEndpointLogging,
|
||||
url_route: str,
|
||||
request_body: dict,
|
||||
endpoint_type: EndpointType,
|
||||
start_time: datetime,
|
||||
all_chunks: List[str],
|
||||
end_time: datetime,
|
||||
) -> PassThroughEndpointLoggingTypedDict:
|
||||
"""
|
||||
Takes raw chunks from Anthropic passthrough endpoint and logs them in litellm callbacks
|
||||
|
||||
- Builds complete response from chunks
|
||||
- Creates standard logging object
|
||||
- Logs in litellm callbacks
|
||||
"""
|
||||
|
||||
model = request_body.get("model", "")
|
||||
complete_streaming_response = self._build_complete_streaming_response(
|
||||
all_chunks=all_chunks,
|
||||
litellm_logging_obj=litellm_logging_obj,
|
||||
model=model,
|
||||
)
|
||||
if complete_streaming_response is None:
|
||||
verbose_proxy_logger.error(
|
||||
"Unable to build complete streaming response for Anthropic passthrough endpoint, not logging..."
|
||||
)
|
||||
return {
|
||||
"result": None,
|
||||
"kwargs": {},
|
||||
}
|
||||
kwargs = self._create_response_logging_payload(
|
||||
litellm_model_response=complete_streaming_response,
|
||||
model=model,
|
||||
kwargs={},
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=litellm_logging_obj,
|
||||
)
|
||||
|
||||
return {
|
||||
"result": complete_streaming_response,
|
||||
"kwargs": kwargs,
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
from typing import List, Optional, Union
|
||||
|
||||
from litellm import stream_chunk_builder
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
|
||||
from litellm.llms.base_llm.chat.transformation import BaseConfig
|
||||
from litellm.llms.cohere.chat.v2_transformation import CohereV2ChatConfig
|
||||
from litellm.llms.cohere.common_utils import (
|
||||
ModelResponseIterator as CohereModelResponseIterator,
|
||||
)
|
||||
from litellm.types.utils import LlmProviders, ModelResponse, TextCompletionResponse
|
||||
|
||||
from .base_passthrough_logging_handler import BasePassthroughLoggingHandler
|
||||
|
||||
|
||||
class CoherePassthroughLoggingHandler(BasePassthroughLoggingHandler):
|
||||
@property
|
||||
def llm_provider_name(self) -> LlmProviders:
|
||||
return LlmProviders.COHERE
|
||||
|
||||
def get_provider_config(self, model: str) -> BaseConfig:
|
||||
return CohereV2ChatConfig()
|
||||
|
||||
def _build_complete_streaming_response(
|
||||
self,
|
||||
all_chunks: List[str],
|
||||
litellm_logging_obj: LiteLLMLoggingObj,
|
||||
model: str,
|
||||
) -> Optional[Union[ModelResponse, TextCompletionResponse]]:
|
||||
cohere_model_response_iterator = CohereModelResponseIterator(
|
||||
streaming_response=None,
|
||||
sync_stream=False,
|
||||
)
|
||||
litellm_custom_stream_wrapper = CustomStreamWrapper(
|
||||
completion_stream=cohere_model_response_iterator,
|
||||
model=model,
|
||||
logging_obj=litellm_logging_obj,
|
||||
custom_llm_provider="cohere",
|
||||
)
|
||||
all_openai_chunks = []
|
||||
for _chunk_str in all_chunks:
|
||||
try:
|
||||
generic_chunk = (
|
||||
cohere_model_response_iterator.convert_str_chunk_to_generic_chunk(
|
||||
chunk=_chunk_str
|
||||
)
|
||||
)
|
||||
litellm_chunk = litellm_custom_stream_wrapper.chunk_creator(
|
||||
chunk=generic_chunk
|
||||
)
|
||||
if litellm_chunk is not None:
|
||||
all_openai_chunks.append(litellm_chunk)
|
||||
except (StopIteration, StopAsyncIteration):
|
||||
break
|
||||
complete_streaming_response = stream_chunk_builder(chunks=all_openai_chunks)
|
||||
return complete_streaming_response
|
|
@ -683,6 +683,7 @@ async def pass_through_request( # noqa: PLR0915
|
|||
end_time=end_time,
|
||||
logging_obj=logging_obj,
|
||||
cache_hit=False,
|
||||
request_body=_parsed_body,
|
||||
**kwargs,
|
||||
)
|
||||
)
|
||||
|
|
|
@ -16,10 +16,15 @@ from .llm_provider_handlers.anthropic_passthrough_logging_handler import (
|
|||
from .llm_provider_handlers.assembly_passthrough_logging_handler import (
|
||||
AssemblyAIPassthroughLoggingHandler,
|
||||
)
|
||||
from .llm_provider_handlers.cohere_passthrough_logging_handler import (
|
||||
CoherePassthroughLoggingHandler,
|
||||
)
|
||||
from .llm_provider_handlers.vertex_passthrough_logging_handler import (
|
||||
VertexPassthroughLoggingHandler,
|
||||
)
|
||||
|
||||
cohere_passthrough_logging_handler = CoherePassthroughLoggingHandler()
|
||||
|
||||
|
||||
class PassThroughEndpointLogging:
|
||||
def __init__(self):
|
||||
|
@ -32,6 +37,8 @@ class PassThroughEndpointLogging:
|
|||
# Anthropic
|
||||
self.TRACKED_ANTHROPIC_ROUTES = ["/messages"]
|
||||
|
||||
# Cohere
|
||||
self.TRACKED_COHERE_ROUTES = ["/v2/chat"]
|
||||
self.assemblyai_passthrough_logging_handler = (
|
||||
AssemblyAIPassthroughLoggingHandler()
|
||||
)
|
||||
|
@ -84,6 +91,7 @@ class PassThroughEndpointLogging:
|
|||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
cache_hit: bool,
|
||||
request_body: dict,
|
||||
**kwargs,
|
||||
):
|
||||
standard_logging_response_object: Optional[
|
||||
|
@ -125,6 +133,25 @@ class PassThroughEndpointLogging:
|
|||
anthropic_passthrough_logging_handler_result["result"]
|
||||
)
|
||||
kwargs = anthropic_passthrough_logging_handler_result["kwargs"]
|
||||
elif self.is_cohere_route(url_route):
|
||||
cohere_passthrough_logging_handler_result = (
|
||||
cohere_passthrough_logging_handler.passthrough_chat_handler(
|
||||
httpx_response=httpx_response,
|
||||
response_body=response_body or {},
|
||||
logging_obj=logging_obj,
|
||||
url_route=url_route,
|
||||
result=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
cache_hit=cache_hit,
|
||||
request_body=request_body,
|
||||
**kwargs,
|
||||
)
|
||||
)
|
||||
standard_logging_response_object = (
|
||||
cohere_passthrough_logging_handler_result["result"]
|
||||
)
|
||||
kwargs = cohere_passthrough_logging_handler_result["kwargs"]
|
||||
elif self.is_assemblyai_route(url_route):
|
||||
if (
|
||||
AssemblyAIPassthroughLoggingHandler._should_log_request(
|
||||
|
@ -173,6 +200,11 @@ class PassThroughEndpointLogging:
|
|||
return True
|
||||
return False
|
||||
|
||||
def is_cohere_route(self, url_route: str):
|
||||
for route in self.TRACKED_COHERE_ROUTES:
|
||||
if route in url_route:
|
||||
return True
|
||||
|
||||
def is_assemblyai_route(self, url_route: str):
|
||||
parsed_url = urlparse(url_route)
|
||||
if parsed_url.hostname == "api.assemblyai.com":
|
||||
|
|
|
@ -803,9 +803,9 @@ model_max_budget_limiter = _PROXY_VirtualKeyModelMaxBudgetLimiter(
|
|||
dual_cache=user_api_key_cache
|
||||
)
|
||||
litellm.logging_callback_manager.add_litellm_callback(model_max_budget_limiter)
|
||||
redis_usage_cache: Optional[RedisCache] = (
|
||||
None # redis cache used for tracking spend, tpm/rpm limits
|
||||
)
|
||||
redis_usage_cache: Optional[
|
||||
RedisCache
|
||||
] = None # redis cache used for tracking spend, tpm/rpm limits
|
||||
user_custom_auth = None
|
||||
user_custom_key_generate = None
|
||||
user_custom_sso = None
|
||||
|
@ -1131,9 +1131,9 @@ async def update_cache( # noqa: PLR0915
|
|||
_id = "team_id:{}".format(team_id)
|
||||
try:
|
||||
# Fetch the existing cost for the given user
|
||||
existing_spend_obj: Optional[LiteLLM_TeamTable] = (
|
||||
await user_api_key_cache.async_get_cache(key=_id)
|
||||
)
|
||||
existing_spend_obj: Optional[
|
||||
LiteLLM_TeamTable
|
||||
] = await user_api_key_cache.async_get_cache(key=_id)
|
||||
if existing_spend_obj is None:
|
||||
# do nothing if team not in api key cache
|
||||
return
|
||||
|
@ -2812,9 +2812,9 @@ async def initialize( # noqa: PLR0915
|
|||
user_api_base = api_base
|
||||
dynamic_config[user_model]["api_base"] = api_base
|
||||
if api_version:
|
||||
os.environ["AZURE_API_VERSION"] = (
|
||||
api_version # set this for azure - litellm can read this from the env
|
||||
)
|
||||
os.environ[
|
||||
"AZURE_API_VERSION"
|
||||
] = api_version # set this for azure - litellm can read this from the env
|
||||
if max_tokens: # model-specific param
|
||||
dynamic_config[user_model]["max_tokens"] = max_tokens
|
||||
if temperature: # model-specific param
|
||||
|
@ -3316,6 +3316,7 @@ async def model_list(
|
|||
user_model=user_model,
|
||||
infer_model_from_keys=general_settings.get("infer_model_from_keys", False),
|
||||
return_wildcard_routes=return_wildcard_routes,
|
||||
llm_router=llm_router,
|
||||
)
|
||||
|
||||
return dict(
|
||||
|
@ -7758,9 +7759,9 @@ async def get_config_list(
|
|||
hasattr(sub_field_info, "description")
|
||||
and sub_field_info.description is not None
|
||||
):
|
||||
nested_fields[idx].field_description = (
|
||||
sub_field_info.description
|
||||
)
|
||||
nested_fields[
|
||||
idx
|
||||
].field_description = sub_field_info.description
|
||||
idx += 1
|
||||
|
||||
_stored_in_db = None
|
||||
|
|
17
litellm/types/integrations/anthropic_cache_control_hook.py
Normal file
17
litellm/types/integrations/anthropic_cache_control_hook.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
from typing import Literal, Optional, TypedDict, Union
|
||||
|
||||
from litellm.types.llms.openai import ChatCompletionCachedContent
|
||||
|
||||
|
||||
class CacheControlMessageInjectionPoint(TypedDict):
|
||||
"""Type for message-level injection points."""
|
||||
|
||||
location: Literal["message"]
|
||||
role: Optional[
|
||||
Literal["user", "system", "assistant"]
|
||||
] # Optional: target by role (user, system, assistant)
|
||||
index: Optional[Union[int, str]] # Optional: target by specific index
|
||||
control: Optional[ChatCompletionCachedContent]
|
||||
|
||||
|
||||
CacheControlInjectionPoint = CacheControlMessageInjectionPoint
|
|
@ -44,3 +44,59 @@ class ChatHistoryChatBot(TypedDict, total=False):
|
|||
ChatHistory = List[
|
||||
Union[ChatHistorySystem, ChatHistoryChatBot, ChatHistoryUser, ChatHistoryToolResult]
|
||||
]
|
||||
|
||||
|
||||
class CohereV2ChatResponseMessageToolCallFunction(TypedDict, total=False):
|
||||
name: str
|
||||
parameters: dict
|
||||
|
||||
|
||||
class CohereV2ChatResponseMessageToolCall(TypedDict):
|
||||
id: str
|
||||
type: Literal["function"]
|
||||
function: CohereV2ChatResponseMessageToolCallFunction
|
||||
|
||||
|
||||
class CohereV2ChatResponseMessageContent(TypedDict):
|
||||
id: str
|
||||
type: Literal["tool"]
|
||||
tool: str
|
||||
|
||||
|
||||
class CohereV2ChatResponseMessage(TypedDict, total=False):
|
||||
role: Required[Literal["assistant"]]
|
||||
tool_calls: List[CohereV2ChatResponseMessageToolCall]
|
||||
tool_plan: str
|
||||
content: List[CohereV2ChatResponseMessageContent]
|
||||
citations: List[dict]
|
||||
|
||||
|
||||
class CohereV2ChatResponseUsageBilledUnits(TypedDict, total=False):
|
||||
input_tokens: int
|
||||
output_tokens: int
|
||||
search_units: int
|
||||
classifications: int
|
||||
|
||||
|
||||
class CohereV2ChatResponseUsageTokens(TypedDict, total=False):
|
||||
input_tokens: int
|
||||
output_tokens: int
|
||||
|
||||
|
||||
class CohereV2ChatResponseUsage(TypedDict, total=False):
|
||||
billed_units: CohereV2ChatResponseUsageBilledUnits
|
||||
tokens: CohereV2ChatResponseUsageTokens
|
||||
|
||||
|
||||
class CohereV2ChatResponseLogProbs(TypedDict, total=False):
|
||||
token_ids: Required[List[int]]
|
||||
text: str
|
||||
logprobs: List[float]
|
||||
|
||||
|
||||
class CohereV2ChatResponse(TypedDict):
|
||||
id: str
|
||||
finish_reason: str
|
||||
message: CohereV2ChatResponseMessage
|
||||
usage: CohereV2ChatResponseUsage
|
||||
logprobs: CohereV2ChatResponseLogProbs
|
||||
|
|
235
litellm/utils.py
235
litellm/utils.py
|
@ -516,9 +516,9 @@ def function_setup( # noqa: PLR0915
|
|||
function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
|
||||
|
||||
## DYNAMIC CALLBACKS ##
|
||||
dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
|
||||
kwargs.pop("callbacks", None)
|
||||
)
|
||||
dynamic_callbacks: Optional[
|
||||
List[Union[str, Callable, CustomLogger]]
|
||||
] = kwargs.pop("callbacks", None)
|
||||
all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
|
||||
|
||||
if len(all_callbacks) > 0:
|
||||
|
@ -1202,9 +1202,9 @@ def client(original_function): # noqa: PLR0915
|
|||
exception=e,
|
||||
retry_policy=kwargs.get("retry_policy"),
|
||||
)
|
||||
kwargs["retry_policy"] = (
|
||||
reset_retry_policy()
|
||||
) # prevent infinite loops
|
||||
kwargs[
|
||||
"retry_policy"
|
||||
] = reset_retry_policy() # prevent infinite loops
|
||||
litellm.num_retries = (
|
||||
None # set retries to None to prevent infinite loops
|
||||
)
|
||||
|
@ -3013,16 +3013,16 @@ def get_optional_params( # noqa: PLR0915
|
|||
True # so that main.py adds the function call to the prompt
|
||||
)
|
||||
if "tools" in non_default_params:
|
||||
optional_params["functions_unsupported_model"] = (
|
||||
non_default_params.pop("tools")
|
||||
)
|
||||
optional_params[
|
||||
"functions_unsupported_model"
|
||||
] = non_default_params.pop("tools")
|
||||
non_default_params.pop(
|
||||
"tool_choice", None
|
||||
) # causes ollama requests to hang
|
||||
elif "functions" in non_default_params:
|
||||
optional_params["functions_unsupported_model"] = (
|
||||
non_default_params.pop("functions")
|
||||
)
|
||||
optional_params[
|
||||
"functions_unsupported_model"
|
||||
] = non_default_params.pop("functions")
|
||||
elif (
|
||||
litellm.add_function_to_prompt
|
||||
): # if user opts to add it to prompt instead
|
||||
|
@ -3045,10 +3045,10 @@ def get_optional_params( # noqa: PLR0915
|
|||
|
||||
if "response_format" in non_default_params:
|
||||
if provider_config is not None:
|
||||
non_default_params["response_format"] = (
|
||||
provider_config.get_json_schema_from_pydantic_object(
|
||||
response_format=non_default_params["response_format"]
|
||||
)
|
||||
non_default_params[
|
||||
"response_format"
|
||||
] = provider_config.get_json_schema_from_pydantic_object(
|
||||
response_format=non_default_params["response_format"]
|
||||
)
|
||||
else:
|
||||
non_default_params["response_format"] = type_to_response_format_param(
|
||||
|
@ -4064,9 +4064,9 @@ def _count_characters(text: str) -> int:
|
|||
|
||||
|
||||
def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str:
|
||||
_choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = (
|
||||
response_obj.choices
|
||||
)
|
||||
_choices: Union[
|
||||
List[Union[Choices, StreamingChoices]], List[StreamingChoices]
|
||||
] = response_obj.choices
|
||||
|
||||
response_str = ""
|
||||
for choice in _choices:
|
||||
|
@ -4458,14 +4458,14 @@ def _get_model_info_helper( # noqa: PLR0915
|
|||
|
||||
if combined_model_name in litellm.model_cost:
|
||||
key = combined_model_name
|
||||
_model_info = _get_model_info_from_model_cost(key=key)
|
||||
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
|
||||
if not _check_provider_match(
|
||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||
):
|
||||
_model_info = None
|
||||
if _model_info is None and model in litellm.model_cost:
|
||||
key = model
|
||||
_model_info = _get_model_info_from_model_cost(key=key)
|
||||
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
|
||||
if not _check_provider_match(
|
||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||
):
|
||||
|
@ -4475,21 +4475,21 @@ def _get_model_info_helper( # noqa: PLR0915
|
|||
and combined_stripped_model_name in litellm.model_cost
|
||||
):
|
||||
key = combined_stripped_model_name
|
||||
_model_info = _get_model_info_from_model_cost(key=key)
|
||||
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
|
||||
if not _check_provider_match(
|
||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||
):
|
||||
_model_info = None
|
||||
if _model_info is None and stripped_model_name in litellm.model_cost:
|
||||
key = stripped_model_name
|
||||
_model_info = _get_model_info_from_model_cost(key=key)
|
||||
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
|
||||
if not _check_provider_match(
|
||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||
):
|
||||
_model_info = None
|
||||
if _model_info is None and split_model in litellm.model_cost:
|
||||
key = split_model
|
||||
_model_info = _get_model_info_from_model_cost(key=key)
|
||||
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
|
||||
if not _check_provider_match(
|
||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||
):
|
||||
|
@ -5807,8 +5807,133 @@ def trim_messages(
|
|||
return messages
|
||||
|
||||
|
||||
from litellm.caching.in_memory_cache import InMemoryCache
|
||||
|
||||
|
||||
class AvailableModelsCache(InMemoryCache):
|
||||
def __init__(self, ttl_seconds: int = 300, max_size: int = 1000):
|
||||
super().__init__(ttl_seconds, max_size)
|
||||
self._env_hash: Optional[str] = None
|
||||
|
||||
def _get_env_hash(self) -> str:
|
||||
"""Create a hash of relevant environment variables"""
|
||||
env_vars = {
|
||||
k: v
|
||||
for k, v in os.environ.items()
|
||||
if k.startswith(("OPENAI", "ANTHROPIC", "AZURE", "AWS"))
|
||||
}
|
||||
return str(hash(frozenset(env_vars.items())))
|
||||
|
||||
def _check_env_changed(self) -> bool:
|
||||
"""Check if environment variables have changed"""
|
||||
current_hash = self._get_env_hash()
|
||||
if self._env_hash is None:
|
||||
self._env_hash = current_hash
|
||||
return True
|
||||
return current_hash != self._env_hash
|
||||
|
||||
def _get_cache_key(
|
||||
self,
|
||||
custom_llm_provider: Optional[str],
|
||||
litellm_params: Optional[LiteLLM_Params],
|
||||
) -> str:
|
||||
valid_str = ""
|
||||
|
||||
if litellm_params is not None:
|
||||
valid_str = litellm_params.model_dump_json()
|
||||
if custom_llm_provider is not None:
|
||||
valid_str = f"{custom_llm_provider}:{valid_str}"
|
||||
return hashlib.sha256(valid_str.encode()).hexdigest()
|
||||
|
||||
def get_cached_model_info(
|
||||
self,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
litellm_params: Optional[LiteLLM_Params] = None,
|
||||
) -> Optional[List[str]]:
|
||||
"""Get cached model info"""
|
||||
# Check if environment has changed
|
||||
if litellm_params is None and self._check_env_changed():
|
||||
self.cache_dict.clear()
|
||||
return None
|
||||
|
||||
cache_key = self._get_cache_key(custom_llm_provider, litellm_params)
|
||||
|
||||
result = cast(Optional[List[str]], self.get_cache(cache_key))
|
||||
|
||||
if result is not None:
|
||||
return copy.deepcopy(result)
|
||||
return result
|
||||
|
||||
def set_cached_model_info(
|
||||
self,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: Optional[LiteLLM_Params],
|
||||
available_models: List[str],
|
||||
):
|
||||
"""Set cached model info"""
|
||||
cache_key = self._get_cache_key(custom_llm_provider, litellm_params)
|
||||
self.set_cache(cache_key, copy.deepcopy(available_models))
|
||||
|
||||
|
||||
# Global cache instance
|
||||
_model_cache = AvailableModelsCache()
|
||||
|
||||
|
||||
def _infer_valid_provider_from_env_vars(
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
) -> List[str]:
|
||||
valid_providers: List[str] = []
|
||||
environ_keys = os.environ.keys()
|
||||
for provider in litellm.provider_list:
|
||||
if custom_llm_provider and provider != custom_llm_provider:
|
||||
continue
|
||||
|
||||
# edge case litellm has together_ai as a provider, it should be togetherai
|
||||
env_provider_1 = provider.replace("_", "")
|
||||
env_provider_2 = provider
|
||||
|
||||
# litellm standardizes expected provider keys to
|
||||
# PROVIDER_API_KEY. Example: OPENAI_API_KEY, COHERE_API_KEY
|
||||
expected_provider_key_1 = f"{env_provider_1.upper()}_API_KEY"
|
||||
expected_provider_key_2 = f"{env_provider_2.upper()}_API_KEY"
|
||||
if (
|
||||
expected_provider_key_1 in environ_keys
|
||||
or expected_provider_key_2 in environ_keys
|
||||
):
|
||||
# key is set
|
||||
valid_providers.append(provider)
|
||||
|
||||
return valid_providers
|
||||
|
||||
|
||||
def _get_valid_models_from_provider_api(
|
||||
provider_config: BaseLLMModelInfo,
|
||||
custom_llm_provider: str,
|
||||
litellm_params: Optional[LiteLLM_Params] = None,
|
||||
) -> List[str]:
|
||||
try:
|
||||
cached_result = _model_cache.get_cached_model_info(
|
||||
custom_llm_provider, litellm_params
|
||||
)
|
||||
|
||||
if cached_result is not None:
|
||||
return cached_result
|
||||
models = provider_config.get_models(
|
||||
api_key=litellm_params.api_key if litellm_params is not None else None,
|
||||
api_base=litellm_params.api_base if litellm_params is not None else None,
|
||||
)
|
||||
|
||||
_model_cache.set_cached_model_info(custom_llm_provider, litellm_params, models)
|
||||
return models
|
||||
except Exception as e:
|
||||
verbose_logger.debug(f"Error getting valid models: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def get_valid_models(
|
||||
check_provider_endpoint: bool = False, custom_llm_provider: Optional[str] = None
|
||||
check_provider_endpoint: Optional[bool] = None,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
litellm_params: Optional[LiteLLM_Params] = None,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Returns a list of valid LLMs based on the set environment variables
|
||||
|
@ -5819,31 +5944,21 @@ def get_valid_models(
|
|||
Returns:
|
||||
A list of valid LLMs
|
||||
"""
|
||||
|
||||
try:
|
||||
check_provider_endpoint = (
|
||||
check_provider_endpoint or litellm.check_provider_endpoint
|
||||
)
|
||||
# get keys set in .env
|
||||
environ_keys = os.environ.keys()
|
||||
valid_providers = []
|
||||
|
||||
valid_providers: List[str] = []
|
||||
valid_models: List[str] = []
|
||||
# for all valid providers, make a list of supported llms
|
||||
valid_models = []
|
||||
|
||||
for provider in litellm.provider_list:
|
||||
if custom_llm_provider and provider != custom_llm_provider:
|
||||
continue
|
||||
|
||||
# edge case litellm has together_ai as a provider, it should be togetherai
|
||||
env_provider_1 = provider.replace("_", "")
|
||||
env_provider_2 = provider
|
||||
|
||||
# litellm standardizes expected provider keys to
|
||||
# PROVIDER_API_KEY. Example: OPENAI_API_KEY, COHERE_API_KEY
|
||||
expected_provider_key_1 = f"{env_provider_1.upper()}_API_KEY"
|
||||
expected_provider_key_2 = f"{env_provider_2.upper()}_API_KEY"
|
||||
if (
|
||||
expected_provider_key_1 in environ_keys
|
||||
or expected_provider_key_2 in environ_keys
|
||||
):
|
||||
# key is set
|
||||
valid_providers.append(provider)
|
||||
if custom_llm_provider:
|
||||
valid_providers = [custom_llm_provider]
|
||||
else:
|
||||
valid_providers = _infer_valid_provider_from_env_vars(custom_llm_provider)
|
||||
|
||||
for provider in valid_providers:
|
||||
provider_config = ProviderConfigManager.get_provider_model_info(
|
||||
|
@ -5856,15 +5971,24 @@ def get_valid_models(
|
|||
|
||||
if provider == "azure":
|
||||
valid_models.append("Azure-LLM")
|
||||
elif provider_config is not None and check_provider_endpoint:
|
||||
try:
|
||||
models = provider_config.get_models()
|
||||
valid_models.extend(models)
|
||||
except Exception as e:
|
||||
verbose_logger.debug(f"Error getting valid models: {e}")
|
||||
elif (
|
||||
provider_config is not None
|
||||
and check_provider_endpoint
|
||||
and provider is not None
|
||||
):
|
||||
valid_models.extend(
|
||||
_get_valid_models_from_provider_api(
|
||||
provider_config,
|
||||
provider,
|
||||
litellm_params,
|
||||
)
|
||||
)
|
||||
else:
|
||||
models_for_provider = litellm.models_by_provider.get(provider, [])
|
||||
models_for_provider = copy.deepcopy(
|
||||
litellm.models_by_provider.get(provider, [])
|
||||
)
|
||||
valid_models.extend(models_for_provider)
|
||||
|
||||
return valid_models
|
||||
except Exception as e:
|
||||
verbose_logger.debug(f"Error getting valid models: {e}")
|
||||
|
@ -6510,7 +6634,12 @@ class ProviderConfigManager:
|
|||
return litellm.AnthropicModelInfo()
|
||||
elif LlmProviders.XAI == provider:
|
||||
return litellm.XAIModelInfo()
|
||||
elif LlmProviders.VLLM == provider:
|
||||
from litellm.llms.vllm.common_utils import (
|
||||
VLLMModelInfo, # experimental approach, to reduce bloat on __init__.py
|
||||
)
|
||||
|
||||
return VLLMModelInfo()
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.66.0"
|
||||
version = "1.66.1"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -118,7 +118,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.66.0"
|
||||
version = "1.66.1"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
169
tests/litellm/integrations/test_anthropic_cache_control_hook.py
Normal file
169
tests/litellm/integrations/test_anthropic_cache_control_hook.py
Normal file
|
@ -0,0 +1,169 @@
|
|||
import datetime
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
from typing import List, Optional, Tuple
|
||||
from unittest.mock import ANY, MagicMock, Mock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system-path
|
||||
import litellm
|
||||
from litellm.integrations.anthropic_cache_control_hook import AnthropicCacheControlHook
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from litellm.types.llms.openai import AllMessageValues
|
||||
from litellm.types.utils import StandardCallbackDynamicParams
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anthropic_cache_control_hook_system_message():
|
||||
# Use patch.dict to mock environment variables instead of setting them directly
|
||||
with patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"AWS_ACCESS_KEY_ID": "fake_access_key_id",
|
||||
"AWS_SECRET_ACCESS_KEY": "fake_secret_access_key",
|
||||
"AWS_REGION_NAME": "us-west-2",
|
||||
},
|
||||
):
|
||||
anthropic_cache_control_hook = AnthropicCacheControlHook()
|
||||
litellm.callbacks = [anthropic_cache_control_hook]
|
||||
|
||||
# Mock response data
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {
|
||||
"output": {
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "Here is my analysis of the key terms and conditions...",
|
||||
}
|
||||
},
|
||||
"stopReason": "stop_sequence",
|
||||
"usage": {
|
||||
"inputTokens": 100,
|
||||
"outputTokens": 200,
|
||||
"totalTokens": 300,
|
||||
"cacheReadInputTokens": 100,
|
||||
"cacheWriteInputTokens": 200,
|
||||
},
|
||||
}
|
||||
mock_response.status_code = 200
|
||||
|
||||
# Mock AsyncHTTPHandler.post method
|
||||
client = AsyncHTTPHandler()
|
||||
with patch.object(client, "post", return_value=mock_response) as mock_post:
|
||||
response = await litellm.acompletion(
|
||||
model="bedrock/anthropic.claude-3-5-haiku-20241022-v1:0",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "You are an AI assistant tasked with analyzing legal documents.",
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Here is the full text of a complex legal agreement",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what are the key terms and conditions in this agreement?",
|
||||
},
|
||||
],
|
||||
cache_control_injection_points=[
|
||||
{
|
||||
"location": "message",
|
||||
"role": "system",
|
||||
},
|
||||
],
|
||||
client=client,
|
||||
)
|
||||
|
||||
mock_post.assert_called_once()
|
||||
request_body = json.loads(mock_post.call_args.kwargs["data"])
|
||||
|
||||
print("request_body: ", json.dumps(request_body, indent=4))
|
||||
|
||||
# Verify the request body
|
||||
assert request_body["system"][1]["cachePoint"] == {"type": "default"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anthropic_cache_control_hook_user_message():
|
||||
# Use patch.dict to mock environment variables instead of setting them directly
|
||||
with patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"AWS_ACCESS_KEY_ID": "fake_access_key_id",
|
||||
"AWS_SECRET_ACCESS_KEY": "fake_secret_access_key",
|
||||
"AWS_REGION_NAME": "us-west-2",
|
||||
},
|
||||
):
|
||||
anthropic_cache_control_hook = AnthropicCacheControlHook()
|
||||
litellm.callbacks = [anthropic_cache_control_hook]
|
||||
|
||||
# Mock response data
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {
|
||||
"output": {
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "Here is my analysis of the key terms and conditions...",
|
||||
}
|
||||
},
|
||||
"stopReason": "stop_sequence",
|
||||
"usage": {
|
||||
"inputTokens": 100,
|
||||
"outputTokens": 200,
|
||||
"totalTokens": 300,
|
||||
"cacheReadInputTokens": 100,
|
||||
"cacheWriteInputTokens": 200,
|
||||
},
|
||||
}
|
||||
mock_response.status_code = 200
|
||||
|
||||
# Mock AsyncHTTPHandler.post method
|
||||
client = AsyncHTTPHandler()
|
||||
with patch.object(client, "post", return_value=mock_response) as mock_post:
|
||||
response = await litellm.acompletion(
|
||||
model="bedrock/anthropic.claude-3-5-haiku-20241022-v1:0",
|
||||
messages=[
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "You are an AI assistant tasked with analyzing legal documents.",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what are the key terms and conditions in this agreement? <very_long_text>",
|
||||
},
|
||||
],
|
||||
cache_control_injection_points=[
|
||||
{
|
||||
"location": "message",
|
||||
"role": "user",
|
||||
},
|
||||
],
|
||||
client=client,
|
||||
)
|
||||
|
||||
mock_post.assert_called_once()
|
||||
request_body = json.loads(mock_post.call_args.kwargs["data"])
|
||||
|
||||
print("request_body: ", json.dumps(request_body, indent=4))
|
||||
|
||||
# Verify the request body
|
||||
assert request_body["messages"][1]["content"][1]["cachePoint"] == {
|
||||
"type": "default"
|
||||
}
|
|
@ -30,7 +30,7 @@ class TestCustomPromptManagement(CustomPromptManagement):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
non_default_params: dict,
|
||||
prompt_id: str,
|
||||
prompt_id: Optional[str],
|
||||
prompt_variables: Optional[dict],
|
||||
dynamic_callback_params: StandardCallbackDynamicParams,
|
||||
) -> Tuple[str, List[AllMessageValues], dict]:
|
||||
|
|
|
@ -78,6 +78,33 @@ def test_initialize_with_api_key(setup_mocks):
|
|||
assert result["azure_ad_token"] is None
|
||||
|
||||
|
||||
def test_initialize_with_tenant_credentials_env_var(setup_mocks, monkeypatch):
|
||||
monkeypatch.setenv("AZURE_TENANT_ID", "test-tenant-id")
|
||||
monkeypatch.setenv("AZURE_CLIENT_ID", "test-client-id")
|
||||
monkeypatch.setenv("AZURE_CLIENT_SECRET", "test-client-secret")
|
||||
|
||||
result = BaseAzureLLM().initialize_azure_sdk_client(
|
||||
litellm_params={},
|
||||
api_key=None,
|
||||
api_base="https://test.openai.azure.com",
|
||||
model_name="gpt-4",
|
||||
api_version=None,
|
||||
is_async=False,
|
||||
)
|
||||
|
||||
# Verify that get_azure_ad_token_from_entrata_id was called
|
||||
setup_mocks["entrata_token"].assert_called_once_with(
|
||||
tenant_id="test-tenant-id",
|
||||
client_id="test-client-id",
|
||||
client_secret="test-client-secret",
|
||||
)
|
||||
|
||||
# Verify expected result
|
||||
assert result["api_key"] is None
|
||||
assert result["azure_endpoint"] == "https://test.openai.azure.com"
|
||||
assert "azure_ad_token_provider" in result
|
||||
|
||||
|
||||
def test_initialize_with_tenant_credentials(setup_mocks):
|
||||
# Test with tenant_id, client_id, and client_secret provided
|
||||
result = BaseAzureLLM().initialize_azure_sdk_client(
|
||||
|
@ -150,8 +177,12 @@ def test_initialize_with_oidc_token(setup_mocks):
|
|||
assert result["azure_ad_token"] == "mock-oidc-token"
|
||||
|
||||
|
||||
def test_initialize_with_enable_token_refresh(setup_mocks):
|
||||
def test_initialize_with_enable_token_refresh(setup_mocks, monkeypatch):
|
||||
litellm._turn_on_debug()
|
||||
# Enable token refresh
|
||||
monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
|
||||
monkeypatch.delenv("AZURE_CLIENT_SECRET", raising=False)
|
||||
monkeypatch.delenv("AZURE_TENANT_ID", raising=False)
|
||||
setup_mocks["litellm"].enable_azure_ad_token_refresh = True
|
||||
|
||||
# Test with token refresh enabled
|
||||
|
@ -171,8 +202,11 @@ def test_initialize_with_enable_token_refresh(setup_mocks):
|
|||
assert "azure_ad_token_provider" in result
|
||||
|
||||
|
||||
def test_initialize_with_token_refresh_error(setup_mocks):
|
||||
def test_initialize_with_token_refresh_error(setup_mocks, monkeypatch):
|
||||
# Enable token refresh but make it raise an error
|
||||
monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
|
||||
monkeypatch.delenv("AZURE_CLIENT_SECRET", raising=False)
|
||||
monkeypatch.delenv("AZURE_TENANT_ID", raising=False)
|
||||
setup_mocks["litellm"].enable_azure_ad_token_refresh = True
|
||||
setup_mocks["token_provider"].side_effect = ValueError("Token provider error")
|
||||
|
||||
|
|
|
@ -41,8 +41,10 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
|||
|
||||
|
||||
# Assuming your trim_messages, shorten_message_to_fit_limit, and get_token_count functions are all in a module named 'message_utils'
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_mock_cache():
|
||||
from litellm.utils import _model_cache
|
||||
_model_cache.flush_cache()
|
||||
# Test 1: Check trimming of normal message
|
||||
def test_basic_trimming():
|
||||
messages = [
|
||||
|
@ -1539,6 +1541,7 @@ def test_get_valid_models_fireworks_ai(monkeypatch):
|
|||
litellm.module_level_client, "get", return_value=mock_response
|
||||
) as mock_post:
|
||||
valid_models = get_valid_models(check_provider_endpoint=True)
|
||||
print("valid_models", valid_models)
|
||||
mock_post.assert_called_once()
|
||||
assert (
|
||||
"fireworks_ai/accounts/fireworks/models/llama-3.1-8b-instruct"
|
||||
|
@ -2122,3 +2125,58 @@ def test_get_provider_audio_transcription_config():
|
|||
config = ProviderConfigManager.get_provider_audio_transcription_config(
|
||||
model="whisper-1", provider=provider
|
||||
)
|
||||
|
||||
|
||||
def test_get_valid_models_from_provider():
|
||||
"""
|
||||
Test that get_valid_models returns the correct models for a given provider
|
||||
"""
|
||||
from litellm.utils import get_valid_models
|
||||
|
||||
valid_models = get_valid_models(custom_llm_provider="openai")
|
||||
assert len(valid_models) > 0
|
||||
assert "gpt-4o-mini" in valid_models
|
||||
|
||||
print("Valid models: ", valid_models)
|
||||
valid_models.remove("gpt-4o-mini")
|
||||
assert "gpt-4o-mini" not in valid_models
|
||||
|
||||
valid_models = get_valid_models(custom_llm_provider="openai")
|
||||
assert len(valid_models) > 0
|
||||
assert "gpt-4o-mini" in valid_models
|
||||
|
||||
|
||||
|
||||
def test_get_valid_models_from_provider_cache_invalidation(monkeypatch):
|
||||
"""
|
||||
Test that get_valid_models returns the correct models for a given provider
|
||||
"""
|
||||
from litellm.utils import _model_cache
|
||||
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "123")
|
||||
|
||||
_model_cache.set_cached_model_info("openai", litellm_params=None, available_models=["gpt-4o-mini"])
|
||||
monkeypatch.delenv("OPENAI_API_KEY")
|
||||
|
||||
assert _model_cache.get_cached_model_info("openai") is None
|
||||
|
||||
|
||||
|
||||
def test_get_valid_models_from_dynamic_api_key():
|
||||
"""
|
||||
Test that get_valid_models returns the correct models for a given provider
|
||||
"""
|
||||
from litellm.utils import get_valid_models
|
||||
from litellm.types.router import CredentialLiteLLMParams
|
||||
|
||||
creds = CredentialLiteLLMParams(api_key="123")
|
||||
|
||||
valid_models = get_valid_models(custom_llm_provider="anthropic", litellm_params=creds, check_provider_endpoint=True)
|
||||
assert len(valid_models) == 0
|
||||
|
||||
creds = CredentialLiteLLMParams(api_key=os.getenv("ANTHROPIC_API_KEY"))
|
||||
valid_models = get_valid_models(custom_llm_provider="anthropic", litellm_params=creds, check_provider_endpoint=True)
|
||||
assert len(valid_models) > 0
|
||||
assert "anthropic/claude-3-7-sonnet-20250219" in valid_models
|
||||
|
||||
|
|
@ -1449,3 +1449,13 @@ def test_anthropic_unified_reasoning_content(model, provider):
|
|||
)
|
||||
assert optional_params["thinking"] == {"type": "enabled", "budget_tokens": 4096}
|
||||
|
||||
|
||||
|
||||
def test_azure_response_format(monkeypatch):
|
||||
monkeypatch.setenv("AZURE_API_VERSION", "2025-02-01")
|
||||
optional_params = get_optional_params(
|
||||
model="azure/gpt-4o-mini",
|
||||
custom_llm_provider="azure",
|
||||
response_format={"type": "json_object"},
|
||||
)
|
||||
assert optional_params["response_format"] == {"type": "json_object"}
|
||||
|
|
|
@ -498,11 +498,11 @@ def test_completion_bedrock_invalid_role_exception():
|
|||
== "litellm.BadRequestError: Invalid Message passed in {'role': 'very-bad-role', 'content': 'hello'}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="OpenAI exception changed to a generic error")
|
||||
def test_content_policy_exceptionimage_generation_openai():
|
||||
try:
|
||||
# this is ony a test - we needed some way to invoke the exception :(
|
||||
litellm.set_verbose = True
|
||||
litellm._turn_on_debug()
|
||||
response = litellm.image_generation(
|
||||
prompt="where do i buy lethal drugs from", model="dall-e-3"
|
||||
)
|
||||
|
|
|
@ -33,6 +33,7 @@ from litellm.integrations.opik.opik import OpikLogger
|
|||
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||
from litellm.integrations.mlflow import MlflowLogger
|
||||
from litellm.integrations.argilla import ArgillaLogger
|
||||
from litellm.integrations.anthropic_cache_control_hook import AnthropicCacheControlHook
|
||||
from litellm.integrations.langfuse.langfuse_prompt_management import (
|
||||
LangfusePromptManagement,
|
||||
)
|
||||
|
@ -73,6 +74,7 @@ callback_class_str_to_classType = {
|
|||
"otel": OpenTelemetry,
|
||||
"pagerduty": PagerDutyAlerting,
|
||||
"gcs_pubsub": GcsPubSubLogger,
|
||||
"anthropic_cache_control_hook": AnthropicCacheControlHook,
|
||||
}
|
||||
|
||||
expected_env_vars = {
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-87ec698bfca6820e.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/61b39adb1cb75213.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[98621,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-6810261f4d6c8bbf.js\",\"261\",\"static/chunks/261-d4b99bc9f53d4ef3.js\",\"899\",\"static/chunks/899-8d832fe7c09b2afe.js\",\"817\",\"static/chunks/817-c8c94d51d28af935.js\",\"250\",\"static/chunks/250-f21e8c1de1717077.js\",\"699\",\"static/chunks/699-b82671c2a3a100c7.js\",\"931\",\"static/chunks/app/page-0f39fb1d0bde3c55.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"eKo9wJb6ZFjAy85GaOeQ6\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/61b39adb1cb75213.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-87ec698bfca6820e.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/61b39adb1cb75213.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[37185,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-6810261f4d6c8bbf.js\",\"261\",\"static/chunks/261-d4b99bc9f53d4ef3.js\",\"899\",\"static/chunks/899-8d832fe7c09b2afe.js\",\"817\",\"static/chunks/817-c8c94d51d28af935.js\",\"250\",\"static/chunks/250-ef859ae76fcc7788.js\",\"699\",\"static/chunks/699-b82671c2a3a100c7.js\",\"931\",\"static/chunks/app/page-0f22a8aad3780430.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"cxKWUnb1loD53gKRUqiFm\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/61b39adb1cb75213.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[98621,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","899","static/chunks/899-8d832fe7c09b2afe.js","817","static/chunks/817-c8c94d51d28af935.js","250","static/chunks/250-f21e8c1de1717077.js","699","static/chunks/699-b82671c2a3a100c7.js","931","static/chunks/app/page-0f39fb1d0bde3c55.js"],"default",1]
|
||||
3:I[37185,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","899","static/chunks/899-8d832fe7c09b2afe.js","817","static/chunks/817-c8c94d51d28af935.js","250","static/chunks/250-ef859ae76fcc7788.js","699","static/chunks/699-b82671c2a3a100c7.js","931","static/chunks/app/page-0f22a8aad3780430.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["eKo9wJb6ZFjAy85GaOeQ6",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["cxKWUnb1loD53gKRUqiFm",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[52829,["42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","250","static/chunks/250-f21e8c1de1717077.js","699","static/chunks/699-b82671c2a3a100c7.js","418","static/chunks/app/model_hub/page-068a441595bd0fc3.js"],"default",1]
|
||||
3:I[52829,["42","static/chunks/42-6810261f4d6c8bbf.js","261","static/chunks/261-d4b99bc9f53d4ef3.js","250","static/chunks/250-ef859ae76fcc7788.js","699","static/chunks/699-b82671c2a3a100c7.js","418","static/chunks/app/model_hub/page-068a441595bd0fc3.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["eKo9wJb6ZFjAy85GaOeQ6",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["cxKWUnb1loD53gKRUqiFm",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-6810261f4d6c8bbf.js","899","static/chunks/899-8d832fe7c09b2afe.js","250","static/chunks/250-f21e8c1de1717077.js","461","static/chunks/app/onboarding/page-957334f0c5d5df41.js"],"default",1]
|
||||
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-6810261f4d6c8bbf.js","899","static/chunks/899-8d832fe7c09b2afe.js","250","static/chunks/250-ef859ae76fcc7788.js","461","static/chunks/app/onboarding/page-957334f0c5d5df41.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["eKo9wJb6ZFjAy85GaOeQ6",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["cxKWUnb1loD53gKRUqiFm",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/61b39adb1cb75213.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -5,6 +5,7 @@ import { Row, Col, Typography, Card } from "antd";
|
|||
import TextArea from "antd/es/input/TextArea";
|
||||
import { Team } from "../key_team_helpers/key_list";
|
||||
import TeamDropdown from "../common_components/team_dropdown";
|
||||
import CacheControlSettings from "./cache_control_settings";
|
||||
const { Link } = Typography;
|
||||
|
||||
interface AdvancedSettingsProps {
|
||||
|
@ -21,6 +22,7 @@ const AdvancedSettings: React.FC<AdvancedSettingsProps> = ({
|
|||
const [form] = Form.useForm();
|
||||
const [customPricing, setCustomPricing] = React.useState(false);
|
||||
const [pricingModel, setPricingModel] = React.useState<'per_token' | 'per_second'>('per_token');
|
||||
const [showCacheControl, setShowCacheControl] = React.useState(false);
|
||||
|
||||
// Add validation function for numbers
|
||||
const validateNumber = (_: any, value: string) => {
|
||||
|
@ -83,6 +85,24 @@ const AdvancedSettings: React.FC<AdvancedSettingsProps> = ({
|
|||
}
|
||||
};
|
||||
|
||||
const handleCacheControlChange = (checked: boolean) => {
|
||||
setShowCacheControl(checked);
|
||||
if (!checked) {
|
||||
const currentParams = form.getFieldValue('litellm_extra_params');
|
||||
try {
|
||||
let paramsObj = currentParams ? JSON.parse(currentParams) : {};
|
||||
delete paramsObj.cache_control_injection_points;
|
||||
if (Object.keys(paramsObj).length > 0) {
|
||||
form.setFieldValue('litellm_extra_params', JSON.stringify(paramsObj, null, 2));
|
||||
} else {
|
||||
form.setFieldValue('litellm_extra_params', '');
|
||||
}
|
||||
} catch (error) {
|
||||
form.setFieldValue('litellm_extra_params', '');
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<Accordion className="mt-2 mb-4">
|
||||
|
@ -150,6 +170,12 @@ const AdvancedSettings: React.FC<AdvancedSettingsProps> = ({
|
|||
</div>
|
||||
)}
|
||||
|
||||
<CacheControlSettings
|
||||
form={form}
|
||||
showCacheControl={showCacheControl}
|
||||
onCacheControlChange={handleCacheControlChange}
|
||||
/>
|
||||
|
||||
<Form.Item
|
||||
label="Use in pass through routes"
|
||||
name="use_in_pass_through"
|
||||
|
|
|
@ -0,0 +1,159 @@
|
|||
import React from "react";
|
||||
import { Form, Switch, Select, Input, Typography } from "antd";
|
||||
import { PlusOutlined, MinusCircleOutlined } from '@ant-design/icons';
|
||||
import NumericalInput from "../shared/numerical_input";
|
||||
|
||||
const { Text } = Typography;
|
||||
|
||||
interface CacheControlInjectionPoint {
|
||||
location: "message";
|
||||
role?: "user" | "system" | "assistant";
|
||||
index?: number;
|
||||
}
|
||||
|
||||
interface CacheControlSettingsProps {
|
||||
form: any; // Form instance from parent
|
||||
showCacheControl: boolean;
|
||||
onCacheControlChange: (checked: boolean) => void;
|
||||
}
|
||||
|
||||
const CacheControlSettings: React.FC<CacheControlSettingsProps> = ({
|
||||
form,
|
||||
showCacheControl,
|
||||
onCacheControlChange,
|
||||
}) => {
|
||||
const updateCacheControlPoints = (injectionPoints: CacheControlInjectionPoint[]) => {
|
||||
const currentParams = form.getFieldValue('litellm_extra_params');
|
||||
try {
|
||||
let paramsObj = currentParams ? JSON.parse(currentParams) : {};
|
||||
if (injectionPoints.length > 0) {
|
||||
paramsObj.cache_control_injection_points = injectionPoints;
|
||||
} else {
|
||||
delete paramsObj.cache_control_injection_points;
|
||||
}
|
||||
if (Object.keys(paramsObj).length > 0) {
|
||||
form.setFieldValue('litellm_extra_params', JSON.stringify(paramsObj, null, 2));
|
||||
} else {
|
||||
form.setFieldValue('litellm_extra_params', '');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error updating cache control points:', error);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<Form.Item
|
||||
label="Cache Control"
|
||||
name="cache_control"
|
||||
valuePropName="checked"
|
||||
className="mb-4"
|
||||
tooltip="Tell litellm where to inject cache control checkpoints. You can specify either by role (to apply to all messages of that role) or by specific message index."
|
||||
>
|
||||
<Switch onChange={onCacheControlChange} className="bg-gray-600" />
|
||||
</Form.Item>
|
||||
|
||||
{showCacheControl && (
|
||||
<div className="ml-6 pl-4 border-l-2 border-gray-200">
|
||||
<Text className="text-sm text-gray-500 block mb-4">
|
||||
Specify either a role (to cache all messages of that role) or a specific message index.
|
||||
If both are provided, the index takes precedence.
|
||||
</Text>
|
||||
|
||||
<Form.List
|
||||
name="cache_control_injection_points"
|
||||
initialValue={[{ location: "message" }]}
|
||||
>
|
||||
{(fields, { add, remove }) => (
|
||||
<>
|
||||
{fields.map((field, index) => (
|
||||
<div key={field.key} className="flex items-center mb-4 gap-4">
|
||||
<Form.Item
|
||||
{...field}
|
||||
label="Type"
|
||||
name={[field.name, 'location']}
|
||||
initialValue="message"
|
||||
className="mb-0"
|
||||
style={{ width: '180px' }}
|
||||
>
|
||||
<Select disabled options={[{ value: 'message', label: 'Message' }]} />
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item
|
||||
{...field}
|
||||
label="Role"
|
||||
name={[field.name, 'role']}
|
||||
className="mb-0"
|
||||
style={{ width: '180px' }}
|
||||
tooltip="Select a role to cache all messages of this type"
|
||||
>
|
||||
<Select
|
||||
placeholder="Select a role"
|
||||
allowClear
|
||||
options={[
|
||||
{ value: 'user', label: 'User' },
|
||||
{ value: 'system', label: 'System' },
|
||||
{ value: 'assistant', label: 'Assistant' },
|
||||
]}
|
||||
onChange={() => {
|
||||
const values = form.getFieldValue('cache_control_points');
|
||||
updateCacheControlPoints(values);
|
||||
}}
|
||||
/>
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item
|
||||
{...field}
|
||||
label="Index"
|
||||
name={[field.name, 'index']}
|
||||
className="mb-0"
|
||||
style={{ width: '180px' }}
|
||||
tooltip="Specify a specific message index (optional)"
|
||||
>
|
||||
<NumericalInput
|
||||
type="number"
|
||||
placeholder="Optional"
|
||||
step={1}
|
||||
min={0}
|
||||
onChange={() => {
|
||||
const values = form.getFieldValue('cache_control_points');
|
||||
updateCacheControlPoints(values);
|
||||
}}
|
||||
/>
|
||||
</Form.Item>
|
||||
|
||||
{fields.length > 1 && (
|
||||
<MinusCircleOutlined
|
||||
className="text-red-500 cursor-pointer text-lg mt-8"
|
||||
onClick={() => {
|
||||
remove(field.name);
|
||||
setTimeout(() => {
|
||||
const values = form.getFieldValue('cache_control_points');
|
||||
updateCacheControlPoints(values);
|
||||
}, 0);
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
|
||||
<Form.Item>
|
||||
<button
|
||||
type="button"
|
||||
className="flex items-center justify-center w-full border border-dashed border-gray-300 py-2 px-4 text-gray-600 hover:text-blue-600 hover:border-blue-300 transition-all rounded"
|
||||
onClick={() => add()}
|
||||
>
|
||||
<PlusOutlined className="mr-2" />
|
||||
Add Injection Point
|
||||
</button>
|
||||
</Form.Item>
|
||||
</>
|
||||
)}
|
||||
</Form.List>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default CacheControlSettings;
|
|
@ -60,7 +60,7 @@ export const prepareModelAddRequest = async (
|
|||
continue;
|
||||
}
|
||||
// Skip the custom_pricing and pricing_model fields as they're only used for UI control
|
||||
if (key === 'custom_pricing' || key === 'pricing_model') {
|
||||
if (key === 'custom_pricing' || key === 'pricing_model' || key === 'cache_control') {
|
||||
continue;
|
||||
}
|
||||
if (key == "model_name") {
|
||||
|
|
|
@ -23,6 +23,7 @@ import { getProviderLogoAndName } from "./provider_info_helpers";
|
|||
import { getDisplayModelName } from "./view_model/model_name_display";
|
||||
import AddCredentialsModal from "./model_add/add_credentials_tab";
|
||||
import ReuseCredentialsModal from "./model_add/reuse_credentials";
|
||||
import CacheControlSettings from "./add_model/cache_control_settings";
|
||||
|
||||
interface ModelInfoViewProps {
|
||||
modelId: string;
|
||||
|
@ -57,6 +58,7 @@ export default function ModelInfoView({
|
|||
const [isSaving, setIsSaving] = useState(false);
|
||||
const [isEditing, setIsEditing] = useState(false);
|
||||
const [existingCredential, setExistingCredential] = useState<CredentialItem | null>(null);
|
||||
const [showCacheControl, setShowCacheControl] = useState(false);
|
||||
|
||||
const canEditModel = userRole === "Admin" || modelData.model_info.created_by === userID;
|
||||
const isAdmin = userRole === "Admin";
|
||||
|
@ -86,6 +88,11 @@ export default function ModelInfoView({
|
|||
console.log("modelInfoResponse, ", modelInfoResponse);
|
||||
let specificModelData = modelInfoResponse.data[0];
|
||||
setLocalModelData(specificModelData);
|
||||
|
||||
// Check if cache control is enabled
|
||||
if (specificModelData?.litellm_params?.cache_control_injection_points) {
|
||||
setShowCacheControl(true);
|
||||
}
|
||||
}
|
||||
getExistingCredential();
|
||||
getModelInfo();
|
||||
|
@ -112,22 +119,31 @@ export default function ModelInfoView({
|
|||
if (!accessToken) return;
|
||||
setIsSaving(true);
|
||||
|
||||
let updatedLitellmParams = {
|
||||
...localModelData.litellm_params,
|
||||
model: values.litellm_model_name,
|
||||
api_base: values.api_base,
|
||||
custom_llm_provider: values.custom_llm_provider,
|
||||
organization: values.organization,
|
||||
tpm: values.tpm,
|
||||
rpm: values.rpm,
|
||||
max_retries: values.max_retries,
|
||||
timeout: values.timeout,
|
||||
stream_timeout: values.stream_timeout,
|
||||
input_cost_per_token: values.input_cost / 1_000_000,
|
||||
output_cost_per_token: values.output_cost / 1_000_000,
|
||||
};
|
||||
|
||||
// Handle cache control settings
|
||||
if (values.cache_control && values.cache_control_injection_points?.length > 0) {
|
||||
updatedLitellmParams.cache_control_injection_points = values.cache_control_injection_points;
|
||||
} else {
|
||||
delete updatedLitellmParams.cache_control_injection_points;
|
||||
}
|
||||
|
||||
const updateData = {
|
||||
model_name: values.model_name,
|
||||
litellm_params: {
|
||||
...localModelData.litellm_params,
|
||||
model: values.litellm_model_name,
|
||||
api_base: values.api_base,
|
||||
custom_llm_provider: values.custom_llm_provider,
|
||||
organization: values.organization,
|
||||
tpm: values.tpm,
|
||||
rpm: values.rpm,
|
||||
max_retries: values.max_retries,
|
||||
timeout: values.timeout,
|
||||
stream_timeout: values.stream_timeout,
|
||||
input_cost_per_token: values.input_cost / 1_000_000,
|
||||
output_cost_per_token: values.output_cost / 1_000_000,
|
||||
},
|
||||
litellm_params: updatedLitellmParams,
|
||||
model_info: {
|
||||
id: modelId,
|
||||
}
|
||||
|
@ -139,7 +155,7 @@ export default function ModelInfoView({
|
|||
...localModelData,
|
||||
model_name: values.model_name,
|
||||
litellm_model_name: values.litellm_model_name,
|
||||
litellm_params: updateData.litellm_params
|
||||
litellm_params: updatedLitellmParams
|
||||
};
|
||||
|
||||
setLocalModelData(updatedModelData);
|
||||
|
@ -337,6 +353,8 @@ export default function ModelInfoView({
|
|||
(localModelData.litellm_params.input_cost_per_token * 1_000_000) : localModelData.model_info?.input_cost_per_token * 1_000_000 || null,
|
||||
output_cost: localModelData.litellm_params?.output_cost_per_token ?
|
||||
(localModelData.litellm_params.output_cost_per_token * 1_000_000) : localModelData.model_info?.output_cost_per_token * 1_000_000 || null,
|
||||
cache_control: localModelData.litellm_params?.cache_control_injection_points ? true : false,
|
||||
cache_control_injection_points: localModelData.litellm_params?.cache_control_injection_points || [],
|
||||
}}
|
||||
layout="vertical"
|
||||
onValuesChange={() => setIsDirty(true)}
|
||||
|
@ -499,6 +517,37 @@ export default function ModelInfoView({
|
|||
)}
|
||||
</div>
|
||||
|
||||
{/* Cache Control Section */}
|
||||
{isEditing ? (
|
||||
<CacheControlSettings
|
||||
form={form}
|
||||
showCacheControl={showCacheControl}
|
||||
onCacheControlChange={(checked) => setShowCacheControl(checked)}
|
||||
/>
|
||||
) : (
|
||||
<div>
|
||||
<Text className="font-medium">Cache Control</Text>
|
||||
<div className="mt-1 p-2 bg-gray-50 rounded">
|
||||
{localModelData.litellm_params?.cache_control_injection_points ? (
|
||||
<div>
|
||||
<p>Enabled</p>
|
||||
<div className="mt-2">
|
||||
{localModelData.litellm_params.cache_control_injection_points.map((point: any, i: number) => (
|
||||
<div key={i} className="text-sm text-gray-600 mb-1">
|
||||
Location: {point.location},
|
||||
{point.role && <span> Role: {point.role}</span>}
|
||||
{point.index !== undefined && <span> Index: {point.index}</span>}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
"Disabled"
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div>
|
||||
<Text className="font-medium">Team ID</Text>
|
||||
<div className="mt-1 p-2 bg-gray-50 rounded">
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue