diff --git a/docs/my-website/docs/enterprise.md b/docs/my-website/docs/enterprise.md index fc85333b58..19e45bebf0 100644 --- a/docs/my-website/docs/enterprise.md +++ b/docs/my-website/docs/enterprise.md @@ -36,7 +36,8 @@ This covers: - βœ… [Tracking Spend for Custom Tags](./proxy/enterprise#tracking-spend-for-custom-tags) - βœ… [Exporting LLM Logs to GCS Bucket](./proxy/bucket#πŸͺ£-logging-gcs-s3-buckets) - βœ… [API Endpoints to get Spend Reports per Team, API Key, Customer](./proxy/cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend) - - **Advanced Metrics** + - **Prometheus Metrics** + - βœ… [Prometheus Metrics - Num Requests, failures, LLM Provider Outages](./proxy/prometheus) - βœ… [`x-ratelimit-remaining-requests`, `x-ratelimit-remaining-tokens` for LLM APIs on Prometheus](./proxy/prometheus#✨-enterprise-llm-remaining-requests-and-remaining-tokens) - **Guardrails, PII Masking, Content Moderation** - βœ… [Content Moderation with LLM Guard, LlamaGuard, Secret Detection, Google Text Moderations](./proxy/enterprise#content-moderation) diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md index d602756812..33a899222b 100644 --- a/docs/my-website/docs/proxy/enterprise.md +++ b/docs/my-website/docs/proxy/enterprise.md @@ -30,7 +30,8 @@ Features: - βœ… [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags) - βœ… [Exporting LLM Logs to GCS Bucket](./proxy/bucket#πŸͺ£-logging-gcs-s3-buckets) - βœ… [`/spend/report` API endpoint](cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend) -- **Advanced Metrics** +- **Prometheus Metrics** + - βœ… [Prometheus Metrics - Num Requests, failures, LLM Provider Outages](prometheus) - βœ… [`x-ratelimit-remaining-requests`, `x-ratelimit-remaining-tokens` for LLM APIs on Prometheus](prometheus#✨-enterprise-llm-remaining-requests-and-remaining-tokens) - **Guardrails, PII Masking, Content Moderation** - βœ… [Content Moderation with LLM Guard, LlamaGuard, Secret Detection, Google Text Moderations](#content-moderation) diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index 61d1397ac2..e61ccb1d65 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -1,7 +1,16 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# πŸ“ˆ Prometheus metrics [BETA] +# πŸ“ˆ Prometheus metrics + +:::info +🚨 Prometheus Metrics will be moving to LiteLLM Enterprise on September 15th, 2024 + +[Enterprise Pricing](https://www.litellm.ai/#pricing) + +[Contact us here to get a free trial](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) + +::: LiteLLM Exposes a `/metrics` endpoint for Prometheus to Poll @@ -47,9 +56,11 @@ http://localhost:4000/metrics # /metrics ``` -## Metrics Tracked +## πŸ“ˆ Metrics Tracked +### Proxy Requests / Spend Metrics + | Metric Name | Description | |----------------------|--------------------------------------| | `litellm_requests_metric` | Number of requests made, per `"user", "key", "model", "team", "end-user"` | @@ -57,6 +68,19 @@ http://localhost:4000/metrics | `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` | | `litellm_llm_api_failed_requests_metric` | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"` | +### LLM API / Provider Metrics + +| Metric Name | Description | +|----------------------|--------------------------------------| +| `deployment_complete_outage` | Value is "1" when deployment is in cooldown and has had a complete outage. This metric tracks the state of the LLM API Deployment when it's completely unavailable. | +| `deployment_partial_outage` | Value is "1" when deployment is experiencing a partial outage. This metric indicates when the LLM API Deployment is facing issues but is not completely down. | +| `deployment_healthy` | Value is "1" when deployment is in a healthy state. This metric shows when the LLM API Deployment is functioning normally without any outages. | +| `litellm_remaining_requests_metric` | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment | +| `litellm_remaining_tokens` | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment | + + + + ### Budget Metrics | Metric Name | Description | |----------------------|--------------------------------------| @@ -64,55 +88,6 @@ http://localhost:4000/metrics | `litellm_remaining_api_key_budget_metric` | Remaining Budget for API Key (A key Created on LiteLLM)| -### ✨ (Enterprise) LLM Remaining Requests and Remaining Tokens -Set this on your config.yaml to allow you to track how close you are to hitting your TPM / RPM limits on each model group - -```yaml -litellm_settings: - success_callback: ["prometheus"] - failure_callback: ["prometheus"] - return_response_headers: true # ensures the LLM API calls track the response headers -``` - -| Metric Name | Description | -|----------------------|--------------------------------------| -| `litellm_remaining_requests_metric` | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment | -| `litellm_remaining_tokens` | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment | - -Example Metric - - - - -```shell -litellm_remaining_requests -{ - api_base="https://api.openai.com/v1", - api_provider="openai", - litellm_model_name="gpt-3.5-turbo", - model_group="gpt-3.5-turbo" -} -8998.0 -``` - - - - - -```shell -litellm_remaining_tokens -{ - api_base="https://api.openai.com/v1", - api_provider="openai", - litellm_model_name="gpt-3.5-turbo", - model_group="gpt-3.5-turbo" -} -999981.0 -``` - - - - ## Monitor System Health diff --git a/litellm/_service_logger.py b/litellm/_service_logger.py index da0c99aac3..5e9ab03cf4 100644 --- a/litellm/_service_logger.py +++ b/litellm/_service_logger.py @@ -73,6 +73,7 @@ class ServiceLogging(CustomLogger): ) for callback in litellm.service_callback: if callback == "prometheus_system": + await self.init_prometheus_services_logger_if_none() await self.prometheusServicesLogger.async_service_success_hook( payload=payload ) @@ -88,6 +89,11 @@ class ServiceLogging(CustomLogger): event_metadata=event_metadata, ) + async def init_prometheus_services_logger_if_none(self): + if self.prometheusServicesLogger is None: + self.prometheusServicesLogger = self.prometheusServicesLogger() + return + async def async_service_failure_hook( self, service: ServiceTypes, @@ -120,8 +126,7 @@ class ServiceLogging(CustomLogger): ) for callback in litellm.service_callback: if callback == "prometheus_system": - if self.prometheusServicesLogger is None: - self.prometheusServicesLogger = self.prometheusServicesLogger() + await self.init_prometheus_services_logger_if_none() await self.prometheusServicesLogger.async_service_failure_hook( payload=payload ) diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 4a271d6e00..61f4ff02a6 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -8,7 +8,7 @@ import subprocess import sys import traceback import uuid -from typing import Optional, Union +from typing import Optional, TypedDict, Union import dotenv import requests # type: ignore @@ -28,6 +28,10 @@ class PrometheusLogger: from litellm.proxy.proxy_server import premium_user + verbose_logger.warning( + "🚨🚨🚨 Prometheus Metrics will be moving to LiteLLM Enterprise on September 15th, 2024.\n🚨 Contact us here to get a license https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat \n🚨 Enterprise Pricing: https://www.litellm.ai/#pricing" + ) + self.litellm_llm_api_failed_requests_metric = Counter( name="litellm_llm_api_failed_requests_metric", documentation="Total number of failed LLM API calls via litellm", @@ -124,6 +128,29 @@ class PrometheusLogger: "litellm_model_name", ], ) + # Get all keys + _logged_llm_labels = [ + "litellm_model_name", + "model_id", + "api_base", + "api_provider", + ] + + self.deployment_complete_outage = Gauge( + "deployment_complete_outage", + 'Value is "1" when deployment is in cooldown and has had a complete outage', + labelnames=_logged_llm_labels, + ) + self.deployment_partial_outage = Gauge( + "deployment_partial_outage", + 'Value is "1" when deployment is experiencing a partial outage', + labelnames=_logged_llm_labels, + ) + self.deployment_healthy = Gauge( + "deployment_healthy", + 'Value is "1" when deployment is in an healthy state', + labelnames=_logged_llm_labels, + ) except Exception as e: print_verbose(f"Got exception on init prometheus client {str(e)}") @@ -243,7 +270,7 @@ class PrometheusLogger: # set x-ratelimit headers if premium_user is True: - self.set_remaining_tokens_requests_metric(kwargs) + self.set_llm_deployment_success_metrics(kwargs) ### FAILURE INCREMENT ### if "exception" in kwargs: @@ -256,6 +283,8 @@ class PrometheusLogger: user_api_team_alias, user_id, ).inc() + + self.set_llm_deployment_failure_metrics(kwargs) except Exception as e: verbose_logger.error( "prometheus Layer Error(): Exception occured - {}".format(str(e)) @@ -263,7 +292,33 @@ class PrometheusLogger: verbose_logger.debug(traceback.format_exc()) pass - def set_remaining_tokens_requests_metric(self, request_kwargs: dict): + def set_llm_deployment_failure_metrics(self, request_kwargs: dict): + try: + verbose_logger.debug("setting remaining tokens requests metric") + _response_headers = request_kwargs.get("response_headers") + _litellm_params = request_kwargs.get("litellm_params", {}) or {} + _metadata = _litellm_params.get("metadata", {}) + litellm_model_name = request_kwargs.get("model", None) + api_base = _metadata.get("api_base", None) + llm_provider = _litellm_params.get("custom_llm_provider", None) + model_id = _metadata.get("model_id") + + """ + log these labels + ["litellm_model_name", "model_id", "api_base", "api_provider"] + """ + self.set_deployment_partial_outage( + litellm_model_name=litellm_model_name, + model_id=model_id, + api_base=api_base, + llm_provider=llm_provider, + ) + + pass + except: + pass + + def set_llm_deployment_success_metrics(self, request_kwargs: dict): try: verbose_logger.debug("setting remaining tokens requests metric") _response_headers = request_kwargs.get("response_headers") @@ -273,6 +328,7 @@ class PrometheusLogger: model_group = _metadata.get("model_group", None) api_base = _metadata.get("api_base", None) llm_provider = _litellm_params.get("custom_llm_provider", None) + model_id = _metadata.get("model_id") remaining_requests = None remaining_tokens = None @@ -307,14 +363,82 @@ class PrometheusLogger: model_group, llm_provider, api_base, litellm_model_name ).set(remaining_tokens) + """ + log these labels + ["litellm_model_name", "model_id", "api_base", "api_provider"] + """ + self.set_deployment_healthy( + litellm_model_name=litellm_model_name, + model_id=model_id, + api_base=api_base, + llm_provider=llm_provider, + ) except Exception as e: verbose_logger.error( - "Prometheus Error: set_remaining_tokens_requests_metric. Exception occured - {}".format( + "Prometheus Error: set_llm_deployment_success_metrics. Exception occured - {}".format( str(e) ) ) return + def set_deployment_healthy( + self, + litellm_model_name: str, + model_id: str, + api_base: str, + llm_provider: str, + ): + self.deployment_complete_outage.labels( + litellm_model_name, model_id, api_base, llm_provider + ).set(0) + + self.deployment_partial_outage.labels( + litellm_model_name, model_id, api_base, llm_provider + ).set(0) + + self.deployment_healthy.labels( + litellm_model_name, model_id, api_base, llm_provider + ).set(1) + + def set_deployment_complete_outage( + self, + litellm_model_name: str, + model_id: str, + api_base: str, + llm_provider: str, + ): + verbose_logger.debug("setting llm outage metric") + self.deployment_complete_outage.labels( + litellm_model_name, model_id, api_base, llm_provider + ).set(1) + + self.deployment_partial_outage.labels( + litellm_model_name, model_id, api_base, llm_provider + ).set(0) + + self.deployment_healthy.labels( + litellm_model_name, model_id, api_base, llm_provider + ).set(0) + + def set_deployment_partial_outage( + self, + litellm_model_name: str, + model_id: str, + api_base: str, + llm_provider: str, + ): + self.deployment_complete_outage.labels( + litellm_model_name, model_id, api_base, llm_provider + ).set(0) + + self.deployment_partial_outage.labels( + litellm_model_name, model_id, api_base, llm_provider + ).set(1) + + self.deployment_healthy.labels( + litellm_model_name, model_id, api_base, llm_provider + ).set(0) + def safe_get_remaining_budget( max_budget: Optional[float], spend: Optional[float] diff --git a/litellm/llms/vertex_ai_partner.py b/litellm/llms/vertex_ai_partner.py index 08780be765..378ee7290d 100644 --- a/litellm/llms/vertex_ai_partner.py +++ b/litellm/llms/vertex_ai_partner.py @@ -94,18 +94,14 @@ class VertexAILlama3Config: } def get_supported_openai_params(self): - return [ - "max_tokens", - "stream", - ] + return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo") def map_openai_params(self, non_default_params: dict, optional_params: dict): - for param, value in non_default_params.items(): - if param == "max_tokens": - optional_params["max_tokens"] = value - if param == "stream": - optional_params["stream"] = value - return optional_params + return litellm.OpenAIConfig().map_openai_params( + non_default_params=non_default_params, + optional_params=optional_params, + model="gpt-3.5-turbo", + ) class VertexAIPartnerModels(BaseLLM): diff --git a/litellm/main.py b/litellm/main.py index 0fb26b9c12..dcb3642505 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1856,17 +1856,18 @@ def completion( ) openrouter_site_url = get_secret("OR_SITE_URL") or "https://litellm.ai" - openrouter_app_name = get_secret("OR_APP_NAME") or "liteLLM" - headers = ( - headers - or litellm.headers - or { - "HTTP-Referer": openrouter_site_url, - "X-Title": openrouter_app_name, - } - ) + openrouter_headers = { + "HTTP-Referer": openrouter_site_url, + "X-Title": openrouter_app_name, + } + + _headers = headers or litellm.headers + if _headers: + openrouter_headers.update(_headers) + + headers = openrouter_headers ## Load Config config = openrouter.OpenrouterConfig.get_config() diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 1fdcc5e937..f00d5ec3e7 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,7 +1,14 @@ model_list: - - model_name: "*" + - model_name: "gpt-3.5-turbo" litellm_params: - model: "*" + model: "gpt-3.5-turbo" + - model_name: "gpt-4" + litellm_params: + model: "gpt-4" + api_key: "bad_key" + - model_name: "gpt-4o" + litellm_params: + model: "gpt-4o" litellm_settings: - callbacks: ["lakera_prompt_injection"] + fallbacks: [{"gpt-3.5-turbo": ["gpt-4", "gpt-4o"]}] diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 97cd407d32..36b191c90a 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -3,7 +3,7 @@ model_list: litellm_params: model: openai/fake api_key: fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app/ + api_base: https://exampleopenaiendpoint-production.up.railwaz.app/ - model_name: fireworks-llama-v3-70b-instruct litellm_params: model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct @@ -50,4 +50,6 @@ general_settings: litellm_settings: - callbacks: ["otel"] # πŸ‘ˆ KEY CHANGE \ No newline at end of file + callbacks: ["otel"] # πŸ‘ˆ KEY CHANGE + success_callback: ["prometheus"] + failure_callback: ["prometheus"] \ No newline at end of file diff --git a/litellm/router.py b/litellm/router.py index aa9768ba44..5a4d83885f 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -57,6 +57,7 @@ from litellm.router_utils.client_initalization_utils import ( set_client, should_initialize_sync_client, ) +from litellm.router_utils.cooldown_callbacks import router_cooldown_handler from litellm.router_utils.handle_error import send_llm_exception_alert from litellm.scheduler import FlowItem, Scheduler from litellm.types.llms.openai import ( @@ -2316,8 +2317,10 @@ class Router: ) try: if mock_testing_fallbacks is not None and mock_testing_fallbacks is True: - raise Exception( - f"This is a mock exception for model={model_group}, to trigger a fallback. Fallbacks={fallbacks}" + raise litellm.InternalServerError( + model=model_group, + llm_provider="", + message=f"This is a mock exception for model={model_group}, to trigger a fallback. Fallbacks={fallbacks}", ) elif ( mock_testing_context_fallbacks is not None @@ -2347,6 +2350,7 @@ class Router: verbose_router_logger.debug(f"Traceback{traceback.format_exc()}") original_exception = e fallback_model_group = None + fallback_failure_exception_str = "" try: verbose_router_logger.debug("Trying to fallback b/w models") if ( @@ -2505,6 +2509,7 @@ class Router: await self._async_get_cooldown_deployments_with_debug_info(), ) ) + fallback_failure_exception_str = str(new_exception) if hasattr(original_exception, "message"): # add the available fallbacks to the exception @@ -2512,6 +2517,13 @@ class Router: model_group, fallback_model_group, ) + if len(fallback_failure_exception_str) > 0: + original_exception.message += ( + "\nError doing the fallback: {}".format( + fallback_failure_exception_str + ) + ) + raise original_exception async def async_function_with_retries(self, *args, **kwargs): @@ -3294,10 +3306,14 @@ class Router: value=cached_value, key=cooldown_key, ttl=cooldown_time ) - self.send_deployment_cooldown_alert( - deployment_id=deployment, - exception_status=exception_status, - cooldown_time=cooldown_time, + # Trigger cooldown handler + asyncio.create_task( + router_cooldown_handler( + litellm_router_instance=self, + deployment_id=deployment, + exception_status=exception_status, + cooldown_time=cooldown_time, + ) ) else: self.failed_calls.set_cache( @@ -4948,42 +4964,6 @@ class Router: ) print("\033[94m\nInitialized Alerting for litellm.Router\033[0m\n") # noqa - def send_deployment_cooldown_alert( - self, - deployment_id: str, - exception_status: Union[str, int], - cooldown_time: float, - ): - try: - from litellm.proxy.proxy_server import proxy_logging_obj - - # trigger slack alert saying deployment is in cooldown - if ( - proxy_logging_obj is not None - and proxy_logging_obj.alerting is not None - and "slack" in proxy_logging_obj.alerting - ): - _deployment = self.get_deployment(model_id=deployment_id) - if _deployment is None: - return - - _litellm_params = _deployment["litellm_params"] - temp_litellm_params = copy.deepcopy(_litellm_params) - temp_litellm_params = dict(temp_litellm_params) - _model_name = _deployment.get("model_name", None) - _api_base = litellm.get_api_base( - model=_model_name, optional_params=temp_litellm_params - ) - # asyncio.create_task( - # proxy_logging_obj.slack_alerting_instance.send_alert( - # message=f"Router: Cooling down Deployment:\nModel Name: `{_model_name}`\nAPI Base: `{_api_base}`\nCooldown Time: `{cooldown_time} seconds`\nException Status Code: `{str(exception_status)}`\n\nChange 'cooldown_time' + 'allowed_fails' under 'Router Settings' on proxy UI, or via config - https://docs.litellm.ai/docs/proxy/reliability#fallbacks--retries--timeouts--cooldowns", - # alert_type="cooldown_deployment", - # level="Low", - # ) - # ) - except Exception as e: - pass - def set_custom_routing_strategy( self, CustomRoutingStrategy: CustomRoutingStrategyBase ): diff --git a/litellm/router_utils/cooldown_callbacks.py b/litellm/router_utils/cooldown_callbacks.py new file mode 100644 index 0000000000..3a5213ec03 --- /dev/null +++ b/litellm/router_utils/cooldown_callbacks.py @@ -0,0 +1,51 @@ +""" +Callbacks triggered on cooling down deployments +""" + +import copy +from typing import TYPE_CHECKING, Any, Union + +import litellm +from litellm._logging import verbose_logger + +if TYPE_CHECKING: + from litellm.router import Router as _Router + + LitellmRouter = _Router +else: + LitellmRouter = Any + + +async def router_cooldown_handler( + litellm_router_instance: LitellmRouter, + deployment_id: str, + exception_status: Union[str, int], + cooldown_time: float, +): + _deployment = litellm_router_instance.get_deployment(model_id=deployment_id) + if _deployment is None: + verbose_logger.warning( + f"in router_cooldown_handler but _deployment is None for deployment_id={deployment_id}. Doing nothing" + ) + return + _litellm_params = _deployment["litellm_params"] + temp_litellm_params = copy.deepcopy(_litellm_params) + temp_litellm_params = dict(temp_litellm_params) + _model_name = _deployment.get("model_name", None) + _api_base = litellm.get_api_base( + model=_model_name, optional_params=temp_litellm_params + ) + model_info = _deployment["model_info"] + model_id = model_info.id + + # Trigger cooldown on Prometheus + from litellm.litellm_core_utils.litellm_logging import prometheusLogger + + if prometheusLogger is not None: + prometheusLogger.set_deployment_complete_outage( + litellm_model_name=_model_name, + model_id=model_id, + api_base="", + llm_provider="", + ) + pass diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 94b8b02c1c..fe644b08c3 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -892,47 +892,51 @@ def test_completion_claude_3_base64(): "model", ["gemini/gemini-1.5-flash"] # "claude-3-sonnet-20240229", ) def test_completion_function_plus_image(model): - litellm.set_verbose = True + try: + litellm.set_verbose = True - image_content = [ - {"type": "text", "text": "What’s in this image?"}, - { - "type": "image_url", - "image_url": { - "url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png" - }, - }, - ] - image_message = {"role": "user", "content": image_content} - - tools = [ - { - "type": "function", - "function": { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, - }, - "required": ["location"], + image_content = [ + {"type": "text", "text": "What’s in this image?"}, + { + "type": "image_url", + "image_url": { + "url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png" }, }, - } - ] + ] + image_message = {"role": "user", "content": image_content} - tool_choice = {"type": "function", "function": {"name": "get_current_weather"}} - messages = [ - { - "role": "user", - "content": "What's the weather like in Boston today in Fahrenheit?", - } - ] + tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + }, + }, + "required": ["location"], + }, + }, + } + ] + + tool_choice = {"type": "function", "function": {"name": "get_current_weather"}} + messages = [ + { + "role": "user", + "content": "What's the weather like in Boston today in Fahrenheit?", + } + ] try: response = completion( @@ -4088,9 +4092,28 @@ async def test_acompletion_gemini(): def test_completion_deepseek(): litellm.set_verbose = True model_name = "deepseek/deepseek-chat" - messages = [{"role": "user", "content": "Hey, how's it going?"}] + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather of an location, the user shoud supply a location first", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + } + }, + "required": ["location"], + }, + }, + }, + ] + messages = [{"role": "user", "content": "How's the weather in Hangzhou?"}] try: - response = completion(model=model_name, messages=messages) + response = completion(model=model_name, messages=messages, tools=tools) # Add any assertions here to check the response print(response) except litellm.APIError as e: diff --git a/litellm/utils.py b/litellm/utils.py index 20beb47dc2..e1a686eaf7 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -3536,22 +3536,11 @@ def get_optional_params( ) _check_valid_arg(supported_params=supported_params) - if frequency_penalty is not None: - optional_params["frequency_penalty"] = frequency_penalty - if max_tokens is not None: - optional_params["max_tokens"] = max_tokens - if presence_penalty is not None: - optional_params["presence_penalty"] = presence_penalty - if stop is not None: - optional_params["stop"] = stop - if stream is not None: - optional_params["stream"] = stream - if temperature is not None: - optional_params["temperature"] = temperature - if logprobs is not None: - optional_params["logprobs"] = logprobs - if top_logprobs is not None: - optional_params["top_logprobs"] = top_logprobs + optional_params = litellm.OpenAIConfig().map_openai_params( + non_default_params=non_default_params, + optional_params=optional_params, + model=model, + ) elif custom_llm_provider == "openrouter": supported_params = get_supported_openai_params( model=model, custom_llm_provider=custom_llm_provider @@ -4141,12 +4130,15 @@ def get_supported_openai_params( "frequency_penalty", "max_tokens", "presence_penalty", + "response_format", "stop", "stream", "temperature", "top_p", "logprobs", "top_logprobs", + "tools", + "tool_choice", ] elif custom_llm_provider == "cohere": return [