litellm/litellm/types/router.py

369 lines
12 KiB
Python

from typing import List, Optional, Union, Dict, Tuple, Literal
import httpx
from pydantic import BaseModel, validator, Field
from .completion import CompletionRequest
from .embedding import EmbeddingRequest
import uuid, enum
class ModelConfig(BaseModel):
model_name: str
litellm_params: Union[CompletionRequest, EmbeddingRequest]
tpm: int
rpm: int
class Config:
protected_namespaces = ()
class RouterConfig(BaseModel):
model_list: List[ModelConfig]
redis_url: Optional[str] = None
redis_host: Optional[str] = None
redis_port: Optional[int] = None
redis_password: Optional[str] = None
cache_responses: Optional[bool] = False
cache_kwargs: Optional[Dict] = {}
caching_groups: Optional[List[Tuple[str, List[str]]]] = None
client_ttl: Optional[int] = 3600
num_retries: Optional[int] = 0
timeout: Optional[float] = None
default_litellm_params: Optional[Dict[str, str]] = {}
set_verbose: Optional[bool] = False
fallbacks: Optional[List] = []
allowed_fails: Optional[int] = None
context_window_fallbacks: Optional[List] = []
model_group_alias: Optional[Dict[str, List[str]]] = {}
retry_after: Optional[int] = 0
routing_strategy: Literal[
"simple-shuffle",
"least-busy",
"usage-based-routing",
"latency-based-routing",
] = "simple-shuffle"
class Config:
protected_namespaces = ()
class UpdateRouterConfig(BaseModel):
"""
Set of params that you can modify via `router.update_settings()`.
"""
routing_strategy_args: Optional[dict] = None
routing_strategy: Optional[str] = None
model_group_retry_policy: Optional[dict] = None
allowed_fails: Optional[int] = None
cooldown_time: Optional[float] = None
num_retries: Optional[int] = None
timeout: Optional[float] = None
max_retries: Optional[int] = None
retry_after: Optional[float] = None
fallbacks: Optional[List[dict]] = None
context_window_fallbacks: Optional[List[dict]] = None
class Config:
protected_namespaces = ()
class ModelInfo(BaseModel):
id: Optional[
str
] # Allow id to be optional on input, but it will always be present as a str in the model instance
db_model: bool = (
False # used for proxy - to separate models which are stored in the db vs. config.
)
def __init__(self, id: Optional[Union[str, int]] = None, **params):
if id is None:
id = str(uuid.uuid4()) # Generate a UUID if id is None or not provided
elif isinstance(id, int):
id = str(id)
super().__init__(id=id, **params)
class Config:
extra = "allow"
def __contains__(self, key):
# Define custom behavior for the 'in' operator
return hasattr(self, key)
def get(self, key, default=None):
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
return getattr(self, key, default)
def __getitem__(self, key):
# Allow dictionary-style access to attributes
return getattr(self, key)
def __setitem__(self, key, value):
# Allow dictionary-style assignment of attributes
setattr(self, key, value)
class GenericLiteLLMParams(BaseModel):
"""
LiteLLM Params without 'model' arg (used across completion / assistants api)
"""
custom_llm_provider: Optional[str] = None
tpm: Optional[int] = None
rpm: Optional[int] = None
api_key: Optional[str] = None
api_base: Optional[str] = None
api_version: Optional[str] = None
timeout: Optional[Union[float, str, httpx.Timeout]] = (
None # if str, pass in as os.environ/
)
stream_timeout: Optional[Union[float, str]] = (
None # timeout when making stream=True calls, if str, pass in as os.environ/
)
max_retries: Optional[int] = None
organization: Optional[str] = None # for openai orgs
## UNIFIED PROJECT/REGION ##
region_name: Optional[str] = None
## VERTEX AI ##
vertex_project: Optional[str] = None
vertex_location: Optional[str] = None
## AWS BEDROCK / SAGEMAKER ##
aws_access_key_id: Optional[str] = None
aws_secret_access_key: Optional[str] = None
aws_region_name: Optional[str] = None
## IBM WATSONX ##
watsonx_region_name: Optional[str] = None
## CUSTOM PRICING ##
input_cost_per_token: Optional[float] = None
output_cost_per_token: Optional[float] = None
input_cost_per_second: Optional[float] = None
output_cost_per_second: Optional[float] = None
def __init__(
self,
custom_llm_provider: Optional[str] = None,
max_retries: Optional[Union[int, str]] = None,
tpm: Optional[int] = None,
rpm: Optional[int] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_version: Optional[str] = None,
timeout: Optional[Union[float, str]] = None, # if str, pass in as os.environ/
stream_timeout: Optional[Union[float, str]] = (
None # timeout when making stream=True calls, if str, pass in as os.environ/
),
organization: Optional[str] = None, # for openai orgs
## UNIFIED PROJECT/REGION ##
region_name: Optional[str] = None,
## VERTEX AI ##
vertex_project: Optional[str] = None,
vertex_location: Optional[str] = None,
## AWS BEDROCK / SAGEMAKER ##
aws_access_key_id: Optional[str] = None,
aws_secret_access_key: Optional[str] = None,
aws_region_name: Optional[str] = None,
## IBM WATSONX ##
watsonx_region_name: Optional[str] = None,
input_cost_per_token: Optional[float] = None,
output_cost_per_token: Optional[float] = None,
input_cost_per_second: Optional[float] = None,
output_cost_per_second: Optional[float] = None,
**params
):
args = locals()
args.pop("max_retries", None)
args.pop("self", None)
args.pop("params", None)
args.pop("__class__", None)
if max_retries is not None and isinstance(max_retries, str):
max_retries = int(max_retries) # cast to int
super().__init__(max_retries=max_retries, **args, **params)
class Config:
extra = "allow"
arbitrary_types_allowed = True
def __contains__(self, key):
# Define custom behavior for the 'in' operator
return hasattr(self, key)
def get(self, key, default=None):
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
return getattr(self, key, default)
def __getitem__(self, key):
# Allow dictionary-style access to attributes
return getattr(self, key)
def __setitem__(self, key, value):
# Allow dictionary-style assignment of attributes
setattr(self, key, value)
class LiteLLM_Params(GenericLiteLLMParams):
"""
LiteLLM Params with 'model' requirement - used for completions
"""
model: str
def __init__(
self,
model: str,
custom_llm_provider: Optional[str] = None,
max_retries: Optional[Union[int, str]] = None,
tpm: Optional[int] = None,
rpm: Optional[int] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_version: Optional[str] = None,
timeout: Optional[Union[float, str]] = None, # if str, pass in as os.environ/
stream_timeout: Optional[Union[float, str]] = (
None # timeout when making stream=True calls, if str, pass in as os.environ/
),
organization: Optional[str] = None, # for openai orgs
## VERTEX AI ##
vertex_project: Optional[str] = None,
vertex_location: Optional[str] = None,
## AWS BEDROCK / SAGEMAKER ##
aws_access_key_id: Optional[str] = None,
aws_secret_access_key: Optional[str] = None,
aws_region_name: Optional[str] = None,
**params
):
args = locals()
args.pop("max_retries", None)
args.pop("self", None)
args.pop("params", None)
args.pop("__class__", None)
if max_retries is not None and isinstance(max_retries, str):
max_retries = int(max_retries) # cast to int
super().__init__(max_retries=max_retries, **args, **params)
class Config:
extra = "allow"
arbitrary_types_allowed = True
def __contains__(self, key):
# Define custom behavior for the 'in' operator
return hasattr(self, key)
def get(self, key, default=None):
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
return getattr(self, key, default)
def __getitem__(self, key):
# Allow dictionary-style access to attributes
return getattr(self, key)
def __setitem__(self, key, value):
# Allow dictionary-style assignment of attributes
setattr(self, key, value)
class updateLiteLLMParams(GenericLiteLLMParams):
# This class is used to update the LiteLLM_Params
# only differece is model is optional
model: Optional[str] = None
class updateDeployment(BaseModel):
model_name: Optional[str] = None
litellm_params: Optional[updateLiteLLMParams] = None
model_info: Optional[ModelInfo] = None
class Config:
protected_namespaces = ()
class Deployment(BaseModel):
model_name: str
litellm_params: LiteLLM_Params
model_info: ModelInfo
def __init__(
self,
model_name: str,
litellm_params: LiteLLM_Params,
model_info: Optional[Union[ModelInfo, dict]] = None,
**params
):
if model_info is None:
model_info = ModelInfo()
elif isinstance(model_info, dict):
model_info = ModelInfo(**model_info)
super().__init__(
model_info=model_info,
model_name=model_name,
litellm_params=litellm_params,
**params
)
def to_json(self, **kwargs):
try:
return self.model_dump(**kwargs) # noqa
except Exception as e:
# if using pydantic v1
return self.dict(**kwargs)
class Config:
extra = "allow"
protected_namespaces = ()
def __contains__(self, key):
# Define custom behavior for the 'in' operator
return hasattr(self, key)
def get(self, key, default=None):
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
return getattr(self, key, default)
def __getitem__(self, key):
# Allow dictionary-style access to attributes
return getattr(self, key)
def __setitem__(self, key, value):
# Allow dictionary-style assignment of attributes
setattr(self, key, value)
class RouterErrors(enum.Enum):
"""
Enum for router specific errors with common codes
"""
user_defined_ratelimit_error = "Deployment over user-defined ratelimit."
no_deployments_available = "No deployments available for selected model"
class RetryPolicy(BaseModel):
"""
Use this to set a custom number of retries per exception type
If RateLimitErrorRetries = 3, then 3 retries will be made for RateLimitError
Mapping of Exception type to number of retries
https://docs.litellm.ai/docs/exception_mapping
"""
BadRequestErrorRetries: Optional[int] = None
AuthenticationErrorRetries: Optional[int] = None
TimeoutErrorRetries: Optional[int] = None
RateLimitErrorRetries: Optional[int] = None
ContentPolicyViolationErrorRetries: Optional[int] = None
InternalServerErrorRetries: Optional[int] = None
class AlertingConfig(BaseModel):
"""
Use this configure alerting for the router. Receive alerts on the following events
- LLM API Exceptions
- LLM Responses Too Slow
- LLM Requests Hanging
Args:
webhook_url: str - webhook url for alerting, slack provides a webhook url to send alerts to
alerting_threshold: Optional[float] = None - threshold for slow / hanging llm responses (in seconds)
"""
webhook_url: str
alerting_threshold: Optional[float] = 300