Merge branch 'main' into litellm_anthropic_streaming_tool_call_fix

This commit is contained in:
Krish Dholakia 2024-08-07 14:33:30 -07:00 committed by GitHub
commit 78d72acc44
13 changed files with 338 additions and 180 deletions

View file

@ -36,7 +36,8 @@ This covers:
- ✅ [Tracking Spend for Custom Tags](./proxy/enterprise#tracking-spend-for-custom-tags) - ✅ [Tracking Spend for Custom Tags](./proxy/enterprise#tracking-spend-for-custom-tags)
- ✅ [Exporting LLM Logs to GCS Bucket](./proxy/bucket#🪣-logging-gcs-s3-buckets) - ✅ [Exporting LLM Logs to GCS Bucket](./proxy/bucket#🪣-logging-gcs-s3-buckets)
- ✅ [API Endpoints to get Spend Reports per Team, API Key, Customer](./proxy/cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend) - ✅ [API Endpoints to get Spend Reports per Team, API Key, Customer](./proxy/cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend)
- **Advanced Metrics** - **Prometheus Metrics**
- ✅ [Prometheus Metrics - Num Requests, failures, LLM Provider Outages](./proxy/prometheus)
- ✅ [`x-ratelimit-remaining-requests`, `x-ratelimit-remaining-tokens` for LLM APIs on Prometheus](./proxy/prometheus#✨-enterprise-llm-remaining-requests-and-remaining-tokens) - ✅ [`x-ratelimit-remaining-requests`, `x-ratelimit-remaining-tokens` for LLM APIs on Prometheus](./proxy/prometheus#✨-enterprise-llm-remaining-requests-and-remaining-tokens)
- **Guardrails, PII Masking, Content Moderation** - **Guardrails, PII Masking, Content Moderation**
- ✅ [Content Moderation with LLM Guard, LlamaGuard, Secret Detection, Google Text Moderations](./proxy/enterprise#content-moderation) - ✅ [Content Moderation with LLM Guard, LlamaGuard, Secret Detection, Google Text Moderations](./proxy/enterprise#content-moderation)

View file

@ -30,7 +30,8 @@ Features:
- ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags) - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)
- ✅ [Exporting LLM Logs to GCS Bucket](./proxy/bucket#🪣-logging-gcs-s3-buckets) - ✅ [Exporting LLM Logs to GCS Bucket](./proxy/bucket#🪣-logging-gcs-s3-buckets)
- ✅ [`/spend/report` API endpoint](cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend) - ✅ [`/spend/report` API endpoint](cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend)
- **Advanced Metrics** - **Prometheus Metrics**
- ✅ [Prometheus Metrics - Num Requests, failures, LLM Provider Outages](prometheus)
- ✅ [`x-ratelimit-remaining-requests`, `x-ratelimit-remaining-tokens` for LLM APIs on Prometheus](prometheus#✨-enterprise-llm-remaining-requests-and-remaining-tokens) - ✅ [`x-ratelimit-remaining-requests`, `x-ratelimit-remaining-tokens` for LLM APIs on Prometheus](prometheus#✨-enterprise-llm-remaining-requests-and-remaining-tokens)
- **Guardrails, PII Masking, Content Moderation** - **Guardrails, PII Masking, Content Moderation**
- ✅ [Content Moderation with LLM Guard, LlamaGuard, Secret Detection, Google Text Moderations](#content-moderation) - ✅ [Content Moderation with LLM Guard, LlamaGuard, Secret Detection, Google Text Moderations](#content-moderation)

View file

@ -1,7 +1,16 @@
import Tabs from '@theme/Tabs'; import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem'; import TabItem from '@theme/TabItem';
# 📈 Prometheus metrics [BETA] # 📈 Prometheus metrics
:::info
🚨 Prometheus Metrics will be moving to LiteLLM Enterprise on September 15th, 2024
[Enterprise Pricing](https://www.litellm.ai/#pricing)
[Contact us here to get a free trial](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
:::
LiteLLM Exposes a `/metrics` endpoint for Prometheus to Poll LiteLLM Exposes a `/metrics` endpoint for Prometheus to Poll
@ -47,9 +56,11 @@ http://localhost:4000/metrics
# <proxy_base_url>/metrics # <proxy_base_url>/metrics
``` ```
## Metrics Tracked ## 📈 Metrics Tracked
### Proxy Requests / Spend Metrics
| Metric Name | Description | | Metric Name | Description |
|----------------------|--------------------------------------| |----------------------|--------------------------------------|
| `litellm_requests_metric` | Number of requests made, per `"user", "key", "model", "team", "end-user"` | | `litellm_requests_metric` | Number of requests made, per `"user", "key", "model", "team", "end-user"` |
@ -57,6 +68,19 @@ http://localhost:4000/metrics
| `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` | | `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` |
| `litellm_llm_api_failed_requests_metric` | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"` | | `litellm_llm_api_failed_requests_metric` | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"` |
### LLM API / Provider Metrics
| Metric Name | Description |
|----------------------|--------------------------------------|
| `deployment_complete_outage` | Value is "1" when deployment is in cooldown and has had a complete outage. This metric tracks the state of the LLM API Deployment when it's completely unavailable. |
| `deployment_partial_outage` | Value is "1" when deployment is experiencing a partial outage. This metric indicates when the LLM API Deployment is facing issues but is not completely down. |
| `deployment_healthy` | Value is "1" when deployment is in a healthy state. This metric shows when the LLM API Deployment is functioning normally without any outages. |
| `litellm_remaining_requests_metric` | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment |
| `litellm_remaining_tokens` | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment |
### Budget Metrics ### Budget Metrics
| Metric Name | Description | | Metric Name | Description |
|----------------------|--------------------------------------| |----------------------|--------------------------------------|
@ -64,55 +88,6 @@ http://localhost:4000/metrics
| `litellm_remaining_api_key_budget_metric` | Remaining Budget for API Key (A key Created on LiteLLM)| | `litellm_remaining_api_key_budget_metric` | Remaining Budget for API Key (A key Created on LiteLLM)|
### ✨ (Enterprise) LLM Remaining Requests and Remaining Tokens
Set this on your config.yaml to allow you to track how close you are to hitting your TPM / RPM limits on each model group
```yaml
litellm_settings:
success_callback: ["prometheus"]
failure_callback: ["prometheus"]
return_response_headers: true # ensures the LLM API calls track the response headers
```
| Metric Name | Description |
|----------------------|--------------------------------------|
| `litellm_remaining_requests_metric` | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment |
| `litellm_remaining_tokens` | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment |
Example Metric
<Tabs>
<TabItem value="Remaining Requests" label="Remaining Requests">
```shell
litellm_remaining_requests
{
api_base="https://api.openai.com/v1",
api_provider="openai",
litellm_model_name="gpt-3.5-turbo",
model_group="gpt-3.5-turbo"
}
8998.0
```
</TabItem>
<TabItem value="Requests" label="Remaining Tokens">
```shell
litellm_remaining_tokens
{
api_base="https://api.openai.com/v1",
api_provider="openai",
litellm_model_name="gpt-3.5-turbo",
model_group="gpt-3.5-turbo"
}
999981.0
```
</TabItem>
</Tabs>
## Monitor System Health ## Monitor System Health

View file

@ -73,6 +73,7 @@ class ServiceLogging(CustomLogger):
) )
for callback in litellm.service_callback: for callback in litellm.service_callback:
if callback == "prometheus_system": if callback == "prometheus_system":
await self.init_prometheus_services_logger_if_none()
await self.prometheusServicesLogger.async_service_success_hook( await self.prometheusServicesLogger.async_service_success_hook(
payload=payload payload=payload
) )
@ -88,6 +89,11 @@ class ServiceLogging(CustomLogger):
event_metadata=event_metadata, event_metadata=event_metadata,
) )
async def init_prometheus_services_logger_if_none(self):
if self.prometheusServicesLogger is None:
self.prometheusServicesLogger = self.prometheusServicesLogger()
return
async def async_service_failure_hook( async def async_service_failure_hook(
self, self,
service: ServiceTypes, service: ServiceTypes,
@ -120,8 +126,7 @@ class ServiceLogging(CustomLogger):
) )
for callback in litellm.service_callback: for callback in litellm.service_callback:
if callback == "prometheus_system": if callback == "prometheus_system":
if self.prometheusServicesLogger is None: await self.init_prometheus_services_logger_if_none()
self.prometheusServicesLogger = self.prometheusServicesLogger()
await self.prometheusServicesLogger.async_service_failure_hook( await self.prometheusServicesLogger.async_service_failure_hook(
payload=payload payload=payload
) )

View file

@ -8,7 +8,7 @@ import subprocess
import sys import sys
import traceback import traceback
import uuid import uuid
from typing import Optional, Union from typing import Optional, TypedDict, Union
import dotenv import dotenv
import requests # type: ignore import requests # type: ignore
@ -28,6 +28,10 @@ class PrometheusLogger:
from litellm.proxy.proxy_server import premium_user from litellm.proxy.proxy_server import premium_user
verbose_logger.warning(
"🚨🚨🚨 Prometheus Metrics will be moving to LiteLLM Enterprise on September 15th, 2024.\n🚨 Contact us here to get a license https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat \n🚨 Enterprise Pricing: https://www.litellm.ai/#pricing"
)
self.litellm_llm_api_failed_requests_metric = Counter( self.litellm_llm_api_failed_requests_metric = Counter(
name="litellm_llm_api_failed_requests_metric", name="litellm_llm_api_failed_requests_metric",
documentation="Total number of failed LLM API calls via litellm", documentation="Total number of failed LLM API calls via litellm",
@ -124,6 +128,29 @@ class PrometheusLogger:
"litellm_model_name", "litellm_model_name",
], ],
) )
# Get all keys
_logged_llm_labels = [
"litellm_model_name",
"model_id",
"api_base",
"api_provider",
]
self.deployment_complete_outage = Gauge(
"deployment_complete_outage",
'Value is "1" when deployment is in cooldown and has had a complete outage',
labelnames=_logged_llm_labels,
)
self.deployment_partial_outage = Gauge(
"deployment_partial_outage",
'Value is "1" when deployment is experiencing a partial outage',
labelnames=_logged_llm_labels,
)
self.deployment_healthy = Gauge(
"deployment_healthy",
'Value is "1" when deployment is in an healthy state',
labelnames=_logged_llm_labels,
)
except Exception as e: except Exception as e:
print_verbose(f"Got exception on init prometheus client {str(e)}") print_verbose(f"Got exception on init prometheus client {str(e)}")
@ -243,7 +270,7 @@ class PrometheusLogger:
# set x-ratelimit headers # set x-ratelimit headers
if premium_user is True: if premium_user is True:
self.set_remaining_tokens_requests_metric(kwargs) self.set_llm_deployment_success_metrics(kwargs)
### FAILURE INCREMENT ### ### FAILURE INCREMENT ###
if "exception" in kwargs: if "exception" in kwargs:
@ -256,6 +283,8 @@ class PrometheusLogger:
user_api_team_alias, user_api_team_alias,
user_id, user_id,
).inc() ).inc()
self.set_llm_deployment_failure_metrics(kwargs)
except Exception as e: except Exception as e:
verbose_logger.error( verbose_logger.error(
"prometheus Layer Error(): Exception occured - {}".format(str(e)) "prometheus Layer Error(): Exception occured - {}".format(str(e))
@ -263,7 +292,33 @@ class PrometheusLogger:
verbose_logger.debug(traceback.format_exc()) verbose_logger.debug(traceback.format_exc())
pass pass
def set_remaining_tokens_requests_metric(self, request_kwargs: dict): def set_llm_deployment_failure_metrics(self, request_kwargs: dict):
try:
verbose_logger.debug("setting remaining tokens requests metric")
_response_headers = request_kwargs.get("response_headers")
_litellm_params = request_kwargs.get("litellm_params", {}) or {}
_metadata = _litellm_params.get("metadata", {})
litellm_model_name = request_kwargs.get("model", None)
api_base = _metadata.get("api_base", None)
llm_provider = _litellm_params.get("custom_llm_provider", None)
model_id = _metadata.get("model_id")
"""
log these labels
["litellm_model_name", "model_id", "api_base", "api_provider"]
"""
self.set_deployment_partial_outage(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
llm_provider=llm_provider,
)
pass
except:
pass
def set_llm_deployment_success_metrics(self, request_kwargs: dict):
try: try:
verbose_logger.debug("setting remaining tokens requests metric") verbose_logger.debug("setting remaining tokens requests metric")
_response_headers = request_kwargs.get("response_headers") _response_headers = request_kwargs.get("response_headers")
@ -273,6 +328,7 @@ class PrometheusLogger:
model_group = _metadata.get("model_group", None) model_group = _metadata.get("model_group", None)
api_base = _metadata.get("api_base", None) api_base = _metadata.get("api_base", None)
llm_provider = _litellm_params.get("custom_llm_provider", None) llm_provider = _litellm_params.get("custom_llm_provider", None)
model_id = _metadata.get("model_id")
remaining_requests = None remaining_requests = None
remaining_tokens = None remaining_tokens = None
@ -307,14 +363,82 @@ class PrometheusLogger:
model_group, llm_provider, api_base, litellm_model_name model_group, llm_provider, api_base, litellm_model_name
).set(remaining_tokens) ).set(remaining_tokens)
"""
log these labels
["litellm_model_name", "model_id", "api_base", "api_provider"]
"""
self.set_deployment_healthy(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
llm_provider=llm_provider,
)
except Exception as e: except Exception as e:
verbose_logger.error( verbose_logger.error(
"Prometheus Error: set_remaining_tokens_requests_metric. Exception occured - {}".format( "Prometheus Error: set_llm_deployment_success_metrics. Exception occured - {}".format(
str(e) str(e)
) )
) )
return return
def set_deployment_healthy(
self,
litellm_model_name: str,
model_id: str,
api_base: str,
llm_provider: str,
):
self.deployment_complete_outage.labels(
litellm_model_name, model_id, api_base, llm_provider
).set(0)
self.deployment_partial_outage.labels(
litellm_model_name, model_id, api_base, llm_provider
).set(0)
self.deployment_healthy.labels(
litellm_model_name, model_id, api_base, llm_provider
).set(1)
def set_deployment_complete_outage(
self,
litellm_model_name: str,
model_id: str,
api_base: str,
llm_provider: str,
):
verbose_logger.debug("setting llm outage metric")
self.deployment_complete_outage.labels(
litellm_model_name, model_id, api_base, llm_provider
).set(1)
self.deployment_partial_outage.labels(
litellm_model_name, model_id, api_base, llm_provider
).set(0)
self.deployment_healthy.labels(
litellm_model_name, model_id, api_base, llm_provider
).set(0)
def set_deployment_partial_outage(
self,
litellm_model_name: str,
model_id: str,
api_base: str,
llm_provider: str,
):
self.deployment_complete_outage.labels(
litellm_model_name, model_id, api_base, llm_provider
).set(0)
self.deployment_partial_outage.labels(
litellm_model_name, model_id, api_base, llm_provider
).set(1)
self.deployment_healthy.labels(
litellm_model_name, model_id, api_base, llm_provider
).set(0)
def safe_get_remaining_budget( def safe_get_remaining_budget(
max_budget: Optional[float], spend: Optional[float] max_budget: Optional[float], spend: Optional[float]

View file

@ -94,18 +94,14 @@ class VertexAILlama3Config:
} }
def get_supported_openai_params(self): def get_supported_openai_params(self):
return [ return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
"max_tokens",
"stream",
]
def map_openai_params(self, non_default_params: dict, optional_params: dict): def map_openai_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items(): return litellm.OpenAIConfig().map_openai_params(
if param == "max_tokens": non_default_params=non_default_params,
optional_params["max_tokens"] = value optional_params=optional_params,
if param == "stream": model="gpt-3.5-turbo",
optional_params["stream"] = value )
return optional_params
class VertexAIPartnerModels(BaseLLM): class VertexAIPartnerModels(BaseLLM):

View file

@ -1856,17 +1856,18 @@ def completion(
) )
openrouter_site_url = get_secret("OR_SITE_URL") or "https://litellm.ai" openrouter_site_url = get_secret("OR_SITE_URL") or "https://litellm.ai"
openrouter_app_name = get_secret("OR_APP_NAME") or "liteLLM" openrouter_app_name = get_secret("OR_APP_NAME") or "liteLLM"
headers = ( openrouter_headers = {
headers "HTTP-Referer": openrouter_site_url,
or litellm.headers "X-Title": openrouter_app_name,
or { }
"HTTP-Referer": openrouter_site_url,
"X-Title": openrouter_app_name, _headers = headers or litellm.headers
} if _headers:
) openrouter_headers.update(_headers)
headers = openrouter_headers
## Load Config ## Load Config
config = openrouter.OpenrouterConfig.get_config() config = openrouter.OpenrouterConfig.get_config()

View file

@ -1,7 +1,14 @@
model_list: model_list:
- model_name: "*" - model_name: "gpt-3.5-turbo"
litellm_params: litellm_params:
model: "*" model: "gpt-3.5-turbo"
- model_name: "gpt-4"
litellm_params:
model: "gpt-4"
api_key: "bad_key"
- model_name: "gpt-4o"
litellm_params:
model: "gpt-4o"
litellm_settings: litellm_settings:
callbacks: ["lakera_prompt_injection"] fallbacks: [{"gpt-3.5-turbo": ["gpt-4", "gpt-4o"]}]

View file

@ -3,7 +3,7 @@ model_list:
litellm_params: litellm_params:
model: openai/fake model: openai/fake
api_key: fake-key api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/ api_base: https://exampleopenaiendpoint-production.up.railwaz.app/
- model_name: fireworks-llama-v3-70b-instruct - model_name: fireworks-llama-v3-70b-instruct
litellm_params: litellm_params:
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
@ -50,4 +50,6 @@ general_settings:
litellm_settings: litellm_settings:
callbacks: ["otel"] # 👈 KEY CHANGE callbacks: ["otel"] # 👈 KEY CHANGE
success_callback: ["prometheus"]
failure_callback: ["prometheus"]

View file

@ -57,6 +57,7 @@ from litellm.router_utils.client_initalization_utils import (
set_client, set_client,
should_initialize_sync_client, should_initialize_sync_client,
) )
from litellm.router_utils.cooldown_callbacks import router_cooldown_handler
from litellm.router_utils.handle_error import send_llm_exception_alert from litellm.router_utils.handle_error import send_llm_exception_alert
from litellm.scheduler import FlowItem, Scheduler from litellm.scheduler import FlowItem, Scheduler
from litellm.types.llms.openai import ( from litellm.types.llms.openai import (
@ -2316,8 +2317,10 @@ class Router:
) )
try: try:
if mock_testing_fallbacks is not None and mock_testing_fallbacks is True: if mock_testing_fallbacks is not None and mock_testing_fallbacks is True:
raise Exception( raise litellm.InternalServerError(
f"This is a mock exception for model={model_group}, to trigger a fallback. Fallbacks={fallbacks}" model=model_group,
llm_provider="",
message=f"This is a mock exception for model={model_group}, to trigger a fallback. Fallbacks={fallbacks}",
) )
elif ( elif (
mock_testing_context_fallbacks is not None mock_testing_context_fallbacks is not None
@ -2347,6 +2350,7 @@ class Router:
verbose_router_logger.debug(f"Traceback{traceback.format_exc()}") verbose_router_logger.debug(f"Traceback{traceback.format_exc()}")
original_exception = e original_exception = e
fallback_model_group = None fallback_model_group = None
fallback_failure_exception_str = ""
try: try:
verbose_router_logger.debug("Trying to fallback b/w models") verbose_router_logger.debug("Trying to fallback b/w models")
if ( if (
@ -2505,6 +2509,7 @@ class Router:
await self._async_get_cooldown_deployments_with_debug_info(), await self._async_get_cooldown_deployments_with_debug_info(),
) )
) )
fallback_failure_exception_str = str(new_exception)
if hasattr(original_exception, "message"): if hasattr(original_exception, "message"):
# add the available fallbacks to the exception # add the available fallbacks to the exception
@ -2512,6 +2517,13 @@ class Router:
model_group, model_group,
fallback_model_group, fallback_model_group,
) )
if len(fallback_failure_exception_str) > 0:
original_exception.message += (
"\nError doing the fallback: {}".format(
fallback_failure_exception_str
)
)
raise original_exception raise original_exception
async def async_function_with_retries(self, *args, **kwargs): async def async_function_with_retries(self, *args, **kwargs):
@ -3294,10 +3306,14 @@ class Router:
value=cached_value, key=cooldown_key, ttl=cooldown_time value=cached_value, key=cooldown_key, ttl=cooldown_time
) )
self.send_deployment_cooldown_alert( # Trigger cooldown handler
deployment_id=deployment, asyncio.create_task(
exception_status=exception_status, router_cooldown_handler(
cooldown_time=cooldown_time, litellm_router_instance=self,
deployment_id=deployment,
exception_status=exception_status,
cooldown_time=cooldown_time,
)
) )
else: else:
self.failed_calls.set_cache( self.failed_calls.set_cache(
@ -4948,42 +4964,6 @@ class Router:
) )
print("\033[94m\nInitialized Alerting for litellm.Router\033[0m\n") # noqa print("\033[94m\nInitialized Alerting for litellm.Router\033[0m\n") # noqa
def send_deployment_cooldown_alert(
self,
deployment_id: str,
exception_status: Union[str, int],
cooldown_time: float,
):
try:
from litellm.proxy.proxy_server import proxy_logging_obj
# trigger slack alert saying deployment is in cooldown
if (
proxy_logging_obj is not None
and proxy_logging_obj.alerting is not None
and "slack" in proxy_logging_obj.alerting
):
_deployment = self.get_deployment(model_id=deployment_id)
if _deployment is None:
return
_litellm_params = _deployment["litellm_params"]
temp_litellm_params = copy.deepcopy(_litellm_params)
temp_litellm_params = dict(temp_litellm_params)
_model_name = _deployment.get("model_name", None)
_api_base = litellm.get_api_base(
model=_model_name, optional_params=temp_litellm_params
)
# asyncio.create_task(
# proxy_logging_obj.slack_alerting_instance.send_alert(
# message=f"Router: Cooling down Deployment:\nModel Name: `{_model_name}`\nAPI Base: `{_api_base}`\nCooldown Time: `{cooldown_time} seconds`\nException Status Code: `{str(exception_status)}`\n\nChange 'cooldown_time' + 'allowed_fails' under 'Router Settings' on proxy UI, or via config - https://docs.litellm.ai/docs/proxy/reliability#fallbacks--retries--timeouts--cooldowns",
# alert_type="cooldown_deployment",
# level="Low",
# )
# )
except Exception as e:
pass
def set_custom_routing_strategy( def set_custom_routing_strategy(
self, CustomRoutingStrategy: CustomRoutingStrategyBase self, CustomRoutingStrategy: CustomRoutingStrategyBase
): ):

View file

@ -0,0 +1,51 @@
"""
Callbacks triggered on cooling down deployments
"""
import copy
from typing import TYPE_CHECKING, Any, Union
import litellm
from litellm._logging import verbose_logger
if TYPE_CHECKING:
from litellm.router import Router as _Router
LitellmRouter = _Router
else:
LitellmRouter = Any
async def router_cooldown_handler(
litellm_router_instance: LitellmRouter,
deployment_id: str,
exception_status: Union[str, int],
cooldown_time: float,
):
_deployment = litellm_router_instance.get_deployment(model_id=deployment_id)
if _deployment is None:
verbose_logger.warning(
f"in router_cooldown_handler but _deployment is None for deployment_id={deployment_id}. Doing nothing"
)
return
_litellm_params = _deployment["litellm_params"]
temp_litellm_params = copy.deepcopy(_litellm_params)
temp_litellm_params = dict(temp_litellm_params)
_model_name = _deployment.get("model_name", None)
_api_base = litellm.get_api_base(
model=_model_name, optional_params=temp_litellm_params
)
model_info = _deployment["model_info"]
model_id = model_info.id
# Trigger cooldown on Prometheus
from litellm.litellm_core_utils.litellm_logging import prometheusLogger
if prometheusLogger is not None:
prometheusLogger.set_deployment_complete_outage(
litellm_model_name=_model_name,
model_id=model_id,
api_base="",
llm_provider="",
)
pass

View file

@ -892,47 +892,51 @@ def test_completion_claude_3_base64():
"model", ["gemini/gemini-1.5-flash"] # "claude-3-sonnet-20240229", "model", ["gemini/gemini-1.5-flash"] # "claude-3-sonnet-20240229",
) )
def test_completion_function_plus_image(model): def test_completion_function_plus_image(model):
litellm.set_verbose = True try:
litellm.set_verbose = True
image_content = [ image_content = [
{"type": "text", "text": "Whats in this image?"}, {"type": "text", "text": "Whats in this image?"},
{ {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
"url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png" "url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
},
},
]
image_message = {"role": "user", "content": image_content}
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
}, },
}, },
} ]
] image_message = {"role": "user", "content": image_content}
tool_choice = {"type": "function", "function": {"name": "get_current_weather"}} tools = [
messages = [ {
{ "type": "function",
"role": "user", "function": {
"content": "What's the weather like in Boston today in Fahrenheit?", "name": "get_current_weather",
} "description": "Get the current weather in a given location",
] "parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
},
},
"required": ["location"],
},
},
}
]
tool_choice = {"type": "function", "function": {"name": "get_current_weather"}}
messages = [
{
"role": "user",
"content": "What's the weather like in Boston today in Fahrenheit?",
}
]
try: try:
response = completion( response = completion(
@ -4088,9 +4092,28 @@ async def test_acompletion_gemini():
def test_completion_deepseek(): def test_completion_deepseek():
litellm.set_verbose = True litellm.set_verbose = True
model_name = "deepseek/deepseek-chat" model_name = "deepseek/deepseek-chat"
messages = [{"role": "user", "content": "Hey, how's it going?"}] tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather of an location, the user shoud supply a location first",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
}
},
"required": ["location"],
},
},
},
]
messages = [{"role": "user", "content": "How's the weather in Hangzhou?"}]
try: try:
response = completion(model=model_name, messages=messages) response = completion(model=model_name, messages=messages, tools=tools)
# Add any assertions here to check the response # Add any assertions here to check the response
print(response) print(response)
except litellm.APIError as e: except litellm.APIError as e:

View file

@ -3536,22 +3536,11 @@ def get_optional_params(
) )
_check_valid_arg(supported_params=supported_params) _check_valid_arg(supported_params=supported_params)
if frequency_penalty is not None: optional_params = litellm.OpenAIConfig().map_openai_params(
optional_params["frequency_penalty"] = frequency_penalty non_default_params=non_default_params,
if max_tokens is not None: optional_params=optional_params,
optional_params["max_tokens"] = max_tokens model=model,
if presence_penalty is not None: )
optional_params["presence_penalty"] = presence_penalty
if stop is not None:
optional_params["stop"] = stop
if stream is not None:
optional_params["stream"] = stream
if temperature is not None:
optional_params["temperature"] = temperature
if logprobs is not None:
optional_params["logprobs"] = logprobs
if top_logprobs is not None:
optional_params["top_logprobs"] = top_logprobs
elif custom_llm_provider == "openrouter": elif custom_llm_provider == "openrouter":
supported_params = get_supported_openai_params( supported_params = get_supported_openai_params(
model=model, custom_llm_provider=custom_llm_provider model=model, custom_llm_provider=custom_llm_provider
@ -4141,12 +4130,15 @@ def get_supported_openai_params(
"frequency_penalty", "frequency_penalty",
"max_tokens", "max_tokens",
"presence_penalty", "presence_penalty",
"response_format",
"stop", "stop",
"stream", "stream",
"temperature", "temperature",
"top_p", "top_p",
"logprobs", "logprobs",
"top_logprobs", "top_logprobs",
"tools",
"tool_choice",
] ]
elif custom_llm_provider == "cohere": elif custom_llm_provider == "cohere":
return [ return [