diff --git a/docs/my-website/docs/proxy/cost_tracking.md b/docs/my-website/docs/proxy/cost_tracking.md index 033413099..7f90273c3 100644 --- a/docs/my-website/docs/proxy/cost_tracking.md +++ b/docs/my-website/docs/proxy/cost_tracking.md @@ -284,9 +284,7 @@ Output from script :::info -Customer This is the value of `user_id` passed when calling [`/key/generate`](https://litellm-api.up.railway.app/#/key%20management/generate_key_fn_key_generate_post) - -[this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm) +Customer [this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm) - [LiteLLM API key](virtual_keys.md) diff --git a/litellm/caching/base_cache.py b/litellm/caching/base_cache.py index 0699832ab..a50e09bf9 100644 --- a/litellm/caching/base_cache.py +++ b/litellm/caching/base_cache.py @@ -23,8 +23,12 @@ class BaseCache: self.default_ttl = default_ttl def get_ttl(self, **kwargs) -> Optional[int]: - if kwargs.get("ttl") is not None: - return kwargs.get("ttl") + kwargs_ttl: Optional[int] = kwargs.get("ttl") + if kwargs_ttl is not None: + try: + return int(kwargs_ttl) + except ValueError: + return self.default_ttl return self.default_ttl def set_cache(self, key, value, **kwargs): diff --git a/litellm/caching/redis_cache.py b/litellm/caching/redis_cache.py index e6c408cc8..042a083a4 100644 --- a/litellm/caching/redis_cache.py +++ b/litellm/caching/redis_cache.py @@ -301,6 +301,7 @@ class RedisCache(BaseCache): print_verbose( f"Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}" ) + try: if not hasattr(redis_client, "set"): raise Exception( diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 01c08ef04..cbeb4d336 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -849,9 +849,13 @@ class PrometheusLogger(CustomLogger): ): try: verbose_logger.debug("setting remaining tokens requests metric") - standard_logging_payload: StandardLoggingPayload = request_kwargs.get( - "standard_logging_object", {} + standard_logging_payload: Optional[StandardLoggingPayload] = ( + request_kwargs.get("standard_logging_object") ) + + if standard_logging_payload is None: + return + model_group = standard_logging_payload["model_group"] api_base = standard_logging_payload["api_base"] _response_headers = request_kwargs.get("response_headers") @@ -862,22 +866,18 @@ class PrometheusLogger(CustomLogger): _model_info = _metadata.get("model_info") or {} model_id = _model_info.get("id", None) - remaining_requests = None - remaining_tokens = None - # OpenAI / OpenAI Compatible headers - if ( - _response_headers - and "x-ratelimit-remaining-requests" in _response_headers - ): - remaining_requests = _response_headers["x-ratelimit-remaining-requests"] - if ( - _response_headers - and "x-ratelimit-remaining-tokens" in _response_headers - ): - remaining_tokens = _response_headers["x-ratelimit-remaining-tokens"] - verbose_logger.debug( - f"remaining requests: {remaining_requests}, remaining tokens: {remaining_tokens}" - ) + remaining_requests: Optional[int] = None + remaining_tokens: Optional[int] = None + if additional_headers := standard_logging_payload["hidden_params"][ + "additional_headers" + ]: + # OpenAI / OpenAI Compatible headers + remaining_requests = additional_headers.get( + "x_ratelimit_remaining_requests", None + ) + remaining_tokens = additional_headers.get( + "x_ratelimit_remaining_tokens", None + ) if remaining_requests: """ diff --git a/litellm/litellm_core_utils/core_helpers.py b/litellm/litellm_core_utils/core_helpers.py index f5619d237..cddca61ee 100644 --- a/litellm/litellm_core_utils/core_helpers.py +++ b/litellm/litellm_core_utils/core_helpers.py @@ -80,7 +80,7 @@ def _get_parent_otel_span_from_kwargs( ) -> Union[Span, None]: try: if kwargs is None: - raise ValueError("kwargs is None") + return None litellm_params = kwargs.get("litellm_params") _metadata = kwargs.get("metadata") or {} if "litellm_parent_otel_span" in _metadata: diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 7f403e422..4753779c0 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -42,6 +42,7 @@ from litellm.types.utils import ( ImageResponse, ModelResponse, StandardCallbackDynamicParams, + StandardLoggingAdditionalHeaders, StandardLoggingHiddenParams, StandardLoggingMetadata, StandardLoggingModelCostFailureDebugInformation, @@ -2640,6 +2641,52 @@ class StandardLoggingPayloadSetup: return final_response_obj + @staticmethod + def get_additional_headers( + additiona_headers: Optional[dict], + ) -> Optional[StandardLoggingAdditionalHeaders]: + + if additiona_headers is None: + return None + + additional_logging_headers: StandardLoggingAdditionalHeaders = {} + + for key in StandardLoggingAdditionalHeaders.__annotations__.keys(): + _key = key.lower() + _key = _key.replace("_", "-") + if _key in additiona_headers: + try: + additional_logging_headers[key] = int(additiona_headers[_key]) # type: ignore + except (ValueError, TypeError): + verbose_logger.debug( + f"Could not convert {additiona_headers[_key]} to int for key {key}." + ) + return additional_logging_headers + + @staticmethod + def get_hidden_params( + hidden_params: Optional[dict], + ) -> StandardLoggingHiddenParams: + clean_hidden_params = StandardLoggingHiddenParams( + model_id=None, + cache_key=None, + api_base=None, + response_cost=None, + additional_headers=None, + ) + if hidden_params is not None: + for key in StandardLoggingHiddenParams.__annotations__.keys(): + if key in hidden_params: + if key == "additional_headers": + clean_hidden_params["additional_headers"] = ( + StandardLoggingPayloadSetup.get_additional_headers( + hidden_params[key] + ) + ) + else: + clean_hidden_params[key] = hidden_params[key] # type: ignore + return clean_hidden_params + def get_standard_logging_object_payload( kwargs: Optional[dict], @@ -2671,7 +2718,9 @@ def get_standard_logging_object_payload( if response_headers is not None: hidden_params = dict( StandardLoggingHiddenParams( - additional_headers=dict(response_headers), + additional_headers=StandardLoggingPayloadSetup.get_additional_headers( + dict(response_headers) + ), model_id=None, cache_key=None, api_base=None, @@ -2712,21 +2761,9 @@ def get_standard_logging_object_payload( ) ) # clean up litellm hidden params - clean_hidden_params = StandardLoggingHiddenParams( - model_id=None, - cache_key=None, - api_base=None, - response_cost=None, - additional_headers=None, + clean_hidden_params = StandardLoggingPayloadSetup.get_hidden_params( + hidden_params ) - if hidden_params is not None: - clean_hidden_params = StandardLoggingHiddenParams( - **{ # type: ignore - key: hidden_params[key] - for key in StandardLoggingHiddenParams.__annotations__.keys() - if key in hidden_params - } - ) # clean up litellm metadata clean_metadata = StandardLoggingPayloadSetup.get_standard_logging_metadata( metadata=metadata diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py index 914651f3d..0c2ccb7d8 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py @@ -431,9 +431,13 @@ class VertexGeminiConfig: elif openai_function_object is not None: gtool_func_declaration = FunctionDeclaration( name=openai_function_object["name"], - description=openai_function_object.get("description", ""), - parameters=openai_function_object.get("parameters", {}), ) + _description = openai_function_object.get("description", None) + _parameters = openai_function_object.get("parameters", None) + if _description is not None: + gtool_func_declaration["description"] = _description + if _parameters is not None: + gtool_func_declaration["parameters"] = _parameters gtool_func_declarations.append(gtool_func_declaration) else: # assume it's a provider-specific param diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 5de5413ed..69a1119cc 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -13,7 +13,7 @@ model_list: litellm_settings: fallbacks: [{ "claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"] }] - callbacks: ["otel"] + callbacks: ["otel", "prometheus"] router_settings: routing_strategy: latency-based-routing diff --git a/litellm/router.py b/litellm/router.py index cc8ad7434..e53c1a8a9 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -5255,6 +5255,7 @@ class Router: parent_otel_span=parent_otel_span, ) raise exception + verbose_router_logger.info( f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}" ) diff --git a/litellm/router_utils/handle_error.py b/litellm/router_utils/handle_error.py index 321ba5dc5..2a15fcce0 100644 --- a/litellm/router_utils/handle_error.py +++ b/litellm/router_utils/handle_error.py @@ -64,6 +64,7 @@ async def send_llm_exception_alert( ) + async def async_raise_no_deployment_exception( litellm_router_instance: LitellmRouter, model: str, parent_otel_span: Optional[Span] ): @@ -73,6 +74,7 @@ async def async_raise_no_deployment_exception( verbose_router_logger.info( f"get_available_deployment for model: {model}, No deployment available" ) + model_ids = litellm_router_instance.get_model_ids(model_name=model) _cooldown_time = litellm_router_instance.cooldown_cache.get_min_cooldown( model_ids=model_ids, parent_otel_span=parent_otel_span diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 0b7a29c91..6658eb330 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -1433,12 +1433,19 @@ class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata): requester_metadata: Optional[dict] +class StandardLoggingAdditionalHeaders(TypedDict, total=False): + x_ratelimit_limit_requests: int + x_ratelimit_limit_tokens: int + x_ratelimit_remaining_requests: int + x_ratelimit_remaining_tokens: int + + class StandardLoggingHiddenParams(TypedDict): model_id: Optional[str] cache_key: Optional[str] api_base: Optional[str] response_cost: Optional[str] - additional_headers: Optional[dict] + additional_headers: Optional[StandardLoggingAdditionalHeaders] class StandardLoggingModelInformation(TypedDict): diff --git a/tests/llm_translation/test_optional_params.py b/tests/llm_translation/test_optional_params.py index fdda7b171..163e7d919 100644 --- a/tests/llm_translation/test_optional_params.py +++ b/tests/llm_translation/test_optional_params.py @@ -786,6 +786,7 @@ def test_unmapped_vertex_anthropic_model(): assert "max_retries" not in optional_params + @pytest.mark.parametrize("provider", ["anthropic", "vertex_ai"]) def test_anthropic_parallel_tool_calls(provider): optional_params = get_optional_params( diff --git a/tests/llm_translation/test_vertex.py b/tests/llm_translation/test_vertex.py index 4a9ef829d..8bd1ddf32 100644 --- a/tests/llm_translation/test_vertex.py +++ b/tests/llm_translation/test_vertex.py @@ -12,8 +12,9 @@ from unittest.mock import AsyncMock, MagicMock, patch sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path - +import pytest import litellm +from litellm import get_optional_params def test_completion_pydantic_obj_2(): @@ -117,3 +118,63 @@ def test_build_vertex_schema(): assert new_schema["type"] == schema["type"] assert new_schema["properties"] == schema["properties"] assert "required" in new_schema and new_schema["required"] == schema["required"] + + +@pytest.mark.parametrize( + "tools, key", + [ + ([{"googleSearchRetrieval": {}}], "googleSearchRetrieval"), + ([{"code_execution": {}}], "code_execution"), + ], +) +def test_vertex_tool_params(tools, key): + + optional_params = get_optional_params( + model="gemini-1.5-pro", + custom_llm_provider="vertex_ai", + tools=tools, + ) + print(optional_params) + assert optional_params["tools"][0][key] == {} + + +@pytest.mark.parametrize( + "tool, expect_parameters", + [ + ( + { + "name": "test_function", + "description": "test_function_description", + "parameters": { + "type": "object", + "properties": {"test_param": {"type": "string"}}, + }, + }, + True, + ), + ( + { + "name": "test_function", + }, + False, + ), + ], +) +def test_vertex_function_translation(tool, expect_parameters): + """ + If param not set, don't set it in the request + """ + + tools = [tool] + optional_params = get_optional_params( + model="gemini-1.5-pro", + custom_llm_provider="vertex_ai", + tools=tools, + ) + print(optional_params) + if expect_parameters: + assert "parameters" in optional_params["tools"][0]["function_declarations"][0] + else: + assert ( + "parameters" not in optional_params["tools"][0]["function_declarations"][0] + ) diff --git a/tests/local_testing/test_caching.py b/tests/local_testing/test_caching.py index 3456f4535..1116840b5 100644 --- a/tests/local_testing/test_caching.py +++ b/tests/local_testing/test_caching.py @@ -609,7 +609,7 @@ async def test_embedding_caching_redis_ttl(): type="redis", host="dummy_host", password="dummy_password", - default_in_redis_ttl=2.5, + default_in_redis_ttl=2, ) inputs = [ @@ -635,7 +635,7 @@ async def test_embedding_caching_redis_ttl(): print(f"redis pipeline set args: {args}") print(f"redis pipeline set kwargs: {kwargs}") assert kwargs.get("ex") == datetime.timedelta( - seconds=2.5 + seconds=2 ) # Check if TTL is set to 2.5 seconds diff --git a/tests/local_testing/test_function_calling.py b/tests/local_testing/test_function_calling.py index 81d31186c..851850a69 100644 --- a/tests/local_testing/test_function_calling.py +++ b/tests/local_testing/test_function_calling.py @@ -612,3 +612,34 @@ def test_passing_tool_result_as_list(): print(resp) assert resp.usage.prompt_tokens_details.cached_tokens > 0 + + +def test_function_calling_with_gemini(): + litellm.set_verbose = True + resp = litellm.completion( + model="gemini/gemini-1.5-pro-002", + messages=[ + { + "content": [ + { + "type": "text", + "text": "You are a helpful assistant that can interact with a computer to solve tasks.\n\n* If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.\n\n", + } + ], + "role": "system", + }, + { + "content": [{"type": "text", "text": "Hey, how's it going?"}], + "role": "user", + }, + ], + tools=[ + { + "type": "function", + "function": { + "name": "finish", + "description": "Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task.", + }, + }, + ], + ) diff --git a/tests/local_testing/test_router_timeout.py b/tests/local_testing/test_router_timeout.py index ccba7f676..c13bc2deb 100644 --- a/tests/local_testing/test_router_timeout.py +++ b/tests/local_testing/test_router_timeout.py @@ -13,7 +13,7 @@ sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path - +from unittest.mock import patch, MagicMock, AsyncMock import os from dotenv import load_dotenv @@ -139,3 +139,51 @@ async def test_router_timeouts_bedrock(): pytest.fail( f"Did not raise error `openai.APITimeoutError`. Instead raised error type: {type(e)}, Error: {e}" ) + + +@pytest.mark.parametrize( + "num_retries, expected_call_count", + [(0, 1), (1, 2), (2, 3), (3, 4)], +) +def test_router_timeout_with_retries_anthropic_model(num_retries, expected_call_count): + """ + If request hits custom timeout, ensure it's retried. + """ + litellm._turn_on_debug() + from litellm.llms.custom_httpx.http_handler import HTTPHandler + import time + + litellm.num_retries = num_retries + litellm.request_timeout = 0.000001 + + router = Router( + model_list=[ + { + "model_name": "claude-3-haiku", + "litellm_params": { + "model": "anthropic/claude-3-haiku-20240307", + }, + } + ], + ) + + custom_client = HTTPHandler() + + with patch.object(custom_client, "post", new=MagicMock()) as mock_client: + try: + + def delayed_response(*args, **kwargs): + time.sleep(0.01) # Exceeds the 0.000001 timeout + raise TimeoutError("Request timed out.") + + mock_client.side_effect = delayed_response + + router.completion( + model="claude-3-haiku", + messages=[{"role": "user", "content": "hello, who are u"}], + client=custom_client, + ) + except litellm.Timeout: + pass + + assert mock_client.call_count == expected_call_count diff --git a/tests/logging_callback_tests/test_prometheus_unit_tests.py b/tests/logging_callback_tests/test_prometheus_unit_tests.py index a2c49b35a..494f83a65 100644 --- a/tests/logging_callback_tests/test_prometheus_unit_tests.py +++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py @@ -549,13 +549,14 @@ def test_set_llm_deployment_success_metrics(prometheus_logger): standard_logging_payload = create_standard_logging_payload() + standard_logging_payload["hidden_params"]["additional_headers"] = { + "x_ratelimit_remaining_requests": 123, + "x_ratelimit_remaining_tokens": 4321, + } + # Create test data request_kwargs = { "model": "gpt-3.5-turbo", - "response_headers": { - "x-ratelimit-remaining-requests": 123, - "x-ratelimit-remaining-tokens": 4321, - }, "litellm_params": { "custom_llm_provider": "openai", "metadata": {"model_info": {"id": "model-123"}}, diff --git a/tests/logging_callback_tests/test_standard_logging_payload.py b/tests/logging_callback_tests/test_standard_logging_payload.py index f6599a005..42d504a1e 100644 --- a/tests/logging_callback_tests/test_standard_logging_payload.py +++ b/tests/logging_callback_tests/test_standard_logging_payload.py @@ -65,3 +65,42 @@ def test_get_usage(response_obj, expected_values): assert usage.prompt_tokens == expected_values[0] assert usage.completion_tokens == expected_values[1] assert usage.total_tokens == expected_values[2] + + +def test_get_additional_headers(): + additional_headers = { + "x-ratelimit-limit-requests": "2000", + "x-ratelimit-remaining-requests": "1999", + "x-ratelimit-limit-tokens": "160000", + "x-ratelimit-remaining-tokens": "160000", + "llm_provider-date": "Tue, 29 Oct 2024 23:57:37 GMT", + "llm_provider-content-type": "application/json", + "llm_provider-transfer-encoding": "chunked", + "llm_provider-connection": "keep-alive", + "llm_provider-anthropic-ratelimit-requests-limit": "2000", + "llm_provider-anthropic-ratelimit-requests-remaining": "1999", + "llm_provider-anthropic-ratelimit-requests-reset": "2024-10-29T23:57:40Z", + "llm_provider-anthropic-ratelimit-tokens-limit": "160000", + "llm_provider-anthropic-ratelimit-tokens-remaining": "160000", + "llm_provider-anthropic-ratelimit-tokens-reset": "2024-10-29T23:57:36Z", + "llm_provider-request-id": "req_01F6CycZZPSHKRCCctcS1Vto", + "llm_provider-via": "1.1 google", + "llm_provider-cf-cache-status": "DYNAMIC", + "llm_provider-x-robots-tag": "none", + "llm_provider-server": "cloudflare", + "llm_provider-cf-ray": "8da71bdbc9b57abb-SJC", + "llm_provider-content-encoding": "gzip", + "llm_provider-x-ratelimit-limit-requests": "2000", + "llm_provider-x-ratelimit-remaining-requests": "1999", + "llm_provider-x-ratelimit-limit-tokens": "160000", + "llm_provider-x-ratelimit-remaining-tokens": "160000", + } + additional_logging_headers = StandardLoggingPayloadSetup.get_additional_headers( + additional_headers + ) + assert additional_logging_headers == { + "x_ratelimit_limit_requests": 2000, + "x_ratelimit_remaining_requests": 1999, + "x_ratelimit_limit_tokens": 160000, + "x_ratelimit_remaining_tokens": 160000, + }