Litellm dev 10 26 2024 (#6472)

* docs(exception_mapping.md): add missing exception types Fixes https://github.com/Aider-AI/aider/issues/2120#issuecomment-2438971183 * fix(main.py): register custom model pricing with specific key Ensure custom model pricing is registered to the specific model+provider key combination * test: make testing more robust for custom pricing * fix(redis_cache.py): instrument otel logging for sync redis calls ensures complete coverage for all redis cache calls
2024-10-28 15:05:43 -07:00 · 2024-10-28 15:05:43 -07:00 · 70111a7abd
commit 70111a7abd
parent f44ab00de2
9 changed files with 310 additions and 72 deletions
--- a/docs/my-website/docs/exception_mapping.md
+++ b/docs/my-website/docs/exception_mapping.md
@ -2,18 +2,33 @@
 LiteLLM maps exceptions across all providers to their OpenAI counterparts.
-| Status Code | Error Type               |
+All exceptions can be imported from `litellm` - e.g. `from litellm import BadRequestError`
-|-------------|--------------------------|
+
-| 400         | BadRequestError          |
+## LiteLLM Exceptions
-| 401         | AuthenticationError      |
+
-| 403         | PermissionDeniedError    |
+| Status Code | Error Type               | Inherits from | Description |
-| 404         | NotFoundError            |
+|-------------|--------------------------|---------------|-------------|
-| 422         | UnprocessableEntityError |
+| 400         | BadRequestError          | openai.BadRequestError |
-| 429         | RateLimitError           |
+| 400 | UnsupportedParamsError | litellm.BadRequestError | Raised when unsupported params are passed |
-| >=500       | InternalServerError      |
+| 400         | ContextWindowExceededError| litellm.BadRequestError | Special error type for context window exceeded error messages - enables context window fallbacks |
-| N/A         | ContextWindowExceededError|
+| 400         | ContentPolicyViolationError| litellm.BadRequestError | Special error type for content policy violation error messages - enables content policy fallbacks |
-| 400         | ContentPolicyViolationError|
+| 400 | InvalidRequestError | openai.BadRequestError | Deprecated error, use BadRequestError instead |
-| 500         | APIConnectionError       |
+| 401         | AuthenticationError      | openai.AuthenticationError |
 | 403         | PermissionDeniedError    | openai.PermissionDeniedError |
 | 404         | NotFoundError            | openai.NotFoundError | raise when invalid models passed, example gpt-8 |
 | 408 | Timeout | openai.APITimeoutError | Raised when a timeout occurs |
 | 422         | UnprocessableEntityError | openai.UnprocessableEntityError |
 | 429         | RateLimitError           | openai.RateLimitError |
 | 500         | APIConnectionError       | openai.APIConnectionError | If any unmapped error is returned, we return this error |
 | 500         | APIError | openai.APIError | Generic 500-status code error | 
 | 503 | ServiceUnavailableError | openai.APIStatusError | If provider returns a service unavailable error, this error is raised |
 | >=500       | InternalServerError      | openai.InternalServerError | If any unmapped 500-status code error is returned, this error is raised |
 | N/A         | APIResponseValidationError | openai.APIResponseValidationError | If Rules are used, and request/response fails a rule, this error is raised |
 | N/A | BudgetExceededError | Exception | Raised for proxy, when budget is exceeded |
 | N/A | JSONSchemaValidationError | litellm.APIResponseValidationError | Raised when response does not match expected json schema - used if `response_schema` param passed in with `enforce_validation=True` |
 | N/A | MockException | Exception | Internal exception, raised by mock_completion class. Do not use directly | 
 | N/A | OpenAIError | openai.OpenAIError | Deprecated internal exception, inherits from openai.OpenAIError. |
 Base case we return APIConnectionError
--- a/litellm/_service_logger.py
+++ b/litellm/_service_logger.py
@ -1,3 +1,4 @@
 import asyncio
 from datetime import datetime, timedelta
 from typing import TYPE_CHECKING, Any, Optional, Union
@ -32,14 +33,63 @@ class ServiceLogging(CustomLogger):
            self.prometheusServicesLogger = PrometheusServicesLogger()
    def service_success_hook(
-        self, service: ServiceTypes, duration: float, call_type: str
+        self,
        service: ServiceTypes,
        duration: float,
        call_type: str,
        parent_otel_span: Optional[Span] = None,
        start_time: Optional[Union[datetime, float]] = None,
        end_time: Optional[Union[float, datetime]] = None,
    ):
        """
-        [TODO] Not implemented for sync calls yet. V0 is focused on async monitoring (used by proxy).
+        Handles both sync and async monitoring by checking for existing event loop.
        """
        # if service == ServiceTypes.REDIS:
        #     print(f"SYNC service: {service}, call_type: {call_type}")
        if self.mock_testing:
            self.mock_testing_sync_success_hook += 1
        try:
            # Try to get the current event loop
            loop = asyncio.get_event_loop()
            # Check if the loop is running
            if loop.is_running():
                # If we're in a running loop, create a task
                loop.create_task(
                    self.async_service_success_hook(
                        service=service,
                        duration=duration,
                        call_type=call_type,
                        parent_otel_span=parent_otel_span,
                        start_time=start_time,
                        end_time=end_time,
                    )
                )
            else:
                # Loop exists but not running, we can use run_until_complete
                loop.run_until_complete(
                    self.async_service_success_hook(
                        service=service,
                        duration=duration,
                        call_type=call_type,
                        parent_otel_span=parent_otel_span,
                        start_time=start_time,
                        end_time=end_time,
                    )
                )
        except RuntimeError:
            # No event loop exists, create a new one and run
            asyncio.run(
                self.async_service_success_hook(
                    service=service,
                    duration=duration,
                    call_type=call_type,
                    parent_otel_span=parent_otel_span,
                    start_time=start_time,
                    end_time=end_time,
                )
            )
    def service_failure_hook(
        self, service: ServiceTypes, duration: float, error: Exception, call_type: str
    ):
@ -62,6 +112,8 @@ class ServiceLogging(CustomLogger):
        """
        - For counting if the redis, postgres call is successful
        """
        # if service == ServiceTypes.REDIS:
        #     print(f"service: {service}, call_type: {call_type}")
        if self.mock_testing:
            self.mock_testing_async_success_hook += 1
--- a/litellm/caching/redis_cache.py
+++ b/litellm/caching/redis_cache.py
@ -143,7 +143,17 @@ class RedisCache(BaseCache):
        )
        key = self.check_and_fix_namespace(key=key)
        try:
            start_time = time.time()
            self.redis_client.set(name=key, value=str(value), ex=ttl)
            end_time = time.time()
            _duration = end_time - start_time
            self.service_logger_obj.service_success_hook(
                service=ServiceTypes.REDIS,
                duration=_duration,
                call_type="set_cache",
                start_time=start_time,
                end_time=end_time,
            )
        except Exception as e:
            # NON blocking - notify users Redis is throwing an exception
            print_verbose(
@ -157,14 +167,44 @@ class RedisCache(BaseCache):
        start_time = time.time()
        set_ttl = self.get_ttl(ttl=ttl)
        try:
            start_time = time.time()
            result: int = _redis_client.incr(name=key, amount=value)  # type: ignore
            end_time = time.time()
            _duration = end_time - start_time
            self.service_logger_obj.service_success_hook(
                service=ServiceTypes.REDIS,
                duration=_duration,
                call_type="increment_cache",
                start_time=start_time,
                end_time=end_time,
            )
            if set_ttl is not None:
                # check if key already has ttl, if not -> set ttl
                start_time = time.time()
                current_ttl = _redis_client.ttl(key)
                end_time = time.time()
                _duration = end_time - start_time
                self.service_logger_obj.service_success_hook(
                    service=ServiceTypes.REDIS,
                    duration=_duration,
                    call_type="increment_cache_ttl",
                    start_time=start_time,
                    end_time=end_time,
                )
                if current_ttl == -1:
                    # Key has no expiration
                    start_time = time.time()
                    _redis_client.expire(key, set_ttl)  # type: ignore
                    end_time = time.time()
                    _duration = end_time - start_time
                    self.service_logger_obj.service_success_hook(
                        service=ServiceTypes.REDIS,
                        duration=_duration,
                        call_type="increment_cache_expire",
                        start_time=start_time,
                        end_time=end_time,
                    )
            return result
        except Exception as e:
            ## LOGGING ##
@ -565,7 +605,17 @@ class RedisCache(BaseCache):
        try:
            key = self.check_and_fix_namespace(key=key)
            print_verbose(f"Get Redis Cache: key: {key}")
            start_time = time.time()
            cached_response = self.redis_client.get(key)
            end_time = time.time()
            _duration = end_time - start_time
            self.service_logger_obj.service_success_hook(
                service=ServiceTypes.REDIS,
                duration=_duration,
                call_type="get_cache",
                start_time=start_time,
                end_time=end_time,
            )
            print_verbose(
                f"Got Redis Cache: key: {key}, cached_response {cached_response}"
            )
@ -586,7 +636,17 @@ class RedisCache(BaseCache):
            for cache_key in key_list:
                cache_key = self.check_and_fix_namespace(key=cache_key)
                _keys.append(cache_key)
            start_time = time.time()
            results: List = self.redis_client.mget(keys=_keys)  # type: ignore
            end_time = time.time()
            _duration = end_time - start_time
            self.service_logger_obj.service_success_hook(
                service=ServiceTypes.REDIS,
                duration=_duration,
                call_type="batch_get_cache",
                start_time=start_time,
                end_time=end_time,
            )
            # Associate the results back with their keys.
            # 'results' is a list of values corresponding to the order of keys in 'key_list'.
@ -725,6 +785,8 @@ class RedisCache(BaseCache):
                service=ServiceTypes.REDIS,
                duration=_duration,
                call_type="sync_ping",
                start_time=start_time,
                end_time=end_time,
            )
            return response
        except Exception as e:
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -661,13 +661,7 @@ class APIResponseValidationError(openai.APIResponseValidationError):  # type: ig
        return _message
-class OpenAIError(openai.OpenAIError):  # type: ignore
+class JSONSchemaValidationError(APIResponseValidationError):
    def __init__(self, original_exception=None):
        super().__init__()
        self.llm_provider = "openai"
 class JSONSchemaValidationError(APIError):
    def __init__(
        self, model: str, llm_provider: str, raw_response: str, schema: str
    ) -> None:
@ -678,9 +672,13 @@ class JSONSchemaValidationError(APIError):
            model, raw_response, schema
        )
        self.message = message
-        super().__init__(
+        super().__init__(model=model, message=message, llm_provider=llm_provider)
-            model=model, message=message, llm_provider=llm_provider, status_code=500
+
-        )
+
 class OpenAIError(openai.OpenAIError):  # type: ignore
    def __init__(self, original_exception=None):
        super().__init__()
        self.llm_provider = "openai"
 class UnsupportedParamsError(BadRequestError):
--- a/litellm/main.py
+++ b/litellm/main.py
@ -933,12 +933,7 @@ def completion(  # type: ignore # noqa: PLR0915
                        "input_cost_per_token": input_cost_per_token,
                        "output_cost_per_token": output_cost_per_token,
                        "litellm_provider": custom_llm_provider,
-                    },
+                    }
                    model: {
                        "input_cost_per_token": input_cost_per_token,
                        "output_cost_per_token": output_cost_per_token,
                        "litellm_provider": custom_llm_provider,
                    },
                }
            )
        elif (
@ -951,12 +946,7 @@ def completion(  # type: ignore # noqa: PLR0915
                        "input_cost_per_second": input_cost_per_second,
                        "output_cost_per_second": output_cost_per_second,
                        "litellm_provider": custom_llm_provider,
-                    },
+                    }
                    model: {
                        "input_cost_per_second": input_cost_per_second,
                        "output_cost_per_second": output_cost_per_second,
                        "litellm_provider": custom_llm_provider,
                    },
                }
            )
        ### BUILD CUSTOM PROMPT TEMPLATE -- IF GIVEN ###
@ -3331,7 +3321,7 @@ def embedding(  # noqa: PLR0915
    if input_cost_per_token is not None and output_cost_per_token is not None:
        litellm.register_model(
            {
-                model: {
+                f"{custom_llm_provider}/{model}": {
                    "input_cost_per_token": input_cost_per_token,
                    "output_cost_per_token": output_cost_per_token,
                    "litellm_provider": custom_llm_provider,
@ -3342,7 +3332,7 @@ def embedding(  # noqa: PLR0915
        output_cost_per_second = output_cost_per_second or 0.0
        litellm.register_model(
            {
-                model: {
+                f"{custom_llm_provider}/{model}": {
                    "input_cost_per_second": input_cost_per_second,
                    "output_cost_per_second": output_cost_per_second,
                    "litellm_provider": custom_llm_provider,
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,15 +1,19 @@
 model_list:
-  - model_name: gpt-4o
+  - model_name: claude-3-5-sonnet-20240620
    litellm_params:
-      model: openai/fake
+      model: claude-3-5-sonnet-20240620
-      api_key: fake-key
+      api_key: os.environ/ANTHROPIC_API_KEY
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+  - model_name: claude-3-5-sonnet-aihubmix
    litellm_params:
      model: openai/claude-3-5-sonnet-20240620
      input_cost_per_token: 0.000003 # 3$/M
      output_cost_per_token: 0.000015 # 15$/M
      api_base: "https://exampleopenaiendpoint-production.up.railway.app"
      api_key: my-fake-key
 litellm_settings:
-  callbacks: ["prometheus", "otel"]
+  fallbacks: [{ "claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"] }]
-
+  callbacks: ["otel"]
 general_settings:
  user_api_key_cache_ttl: 3600
 router_settings:
  routing_strategy: latency-based-routing
@ -19,32 +23,6 @@ router_settings:
    # consider last five minutes of calls for latency calculation
    ttl: 300
  # model_group_alias:
  #   gpt-4o: gpt-4o-128k-2024-05-13
  #   gpt-4o-mini: gpt-4o-mini-128k-2024-07-18
  enable_tag_filtering: True
  # retry call 3 times on each model_name (we don't use fallbacks, so this would be 3 times total)
  num_retries: 3
  # -- cooldown settings --
  # see https://github.com/BerriAI/litellm/blob/main/litellm/router_utils/cooldown_handlers.py#L265
  # cooldown model if it fails > n calls in a minute.
  allowed_fails: 2
  # (in seconds) how long to cooldown model if fails/min > allowed_fails
  cooldown_time: 60
  allowed_fails_policy:
    InternalServerErrorAllowedFails: 1
    RateLimitErrorAllowedFails: 2
    TimeoutErrorAllowedFails: 3
  # -- end cooldown settings --
  # see https://docs.litellm.ai/docs/proxy/prod#3-use-redis-porthost-password-not-redis_url
  redis_host: os.environ/REDIS_HOST
  redis_port: os.environ/REDIS_PORT
  redis_password: os.environ/REDIS_PASSWORD
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2003,6 +2003,7 @@ def register_model(model_cost: Union[str, dict]):  # noqa: PLR0915
        },
    }
    """
    loaded_model_cost = {}
    if isinstance(model_cost, dict):
        loaded_model_cost = model_cost
--- a/tests/documentation_tests/test_exception_types.py
+++ b/tests/documentation_tests/test_exception_types.py
@ -0,0 +1,81 @@
 import os
 import sys
 import traceback
 from dotenv import load_dotenv
 load_dotenv()
 import io
 import re
 # Backup the original sys.path
 original_sys_path = sys.path.copy()
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
 public_exceptions = litellm.LITELLM_EXCEPTION_TYPES
 # Regular expression to extract the error name
 error_name_pattern = re.compile(r"\.exceptions\.([A-Za-z]+Error)")
 # Extract error names from each item
 error_names = {
    error_name_pattern.search(str(item)).group(1)
    for item in public_exceptions
    if error_name_pattern.search(str(item))
 }
 # sys.path = original_sys_path
 # Parse the documentation to extract documented keys
 # repo_base = "./"
 repo_base = "../../"
 print(os.listdir(repo_base))
 docs_path = f"{repo_base}/docs/my-website/docs/exception_mapping.md"  # Path to the documentation
 documented_keys = set()
 try:
    with open(docs_path, "r", encoding="utf-8") as docs_file:
        content = docs_file.read()
        exceptions_section = re.search(
            r"## LiteLLM Exceptions(.*?)\n##", content, re.DOTALL
        )
        if exceptions_section:
            # Step 2: Extract the table content
            table_content = exceptions_section.group(1)
            # Step 3: Create a pattern to capture the Error Types from each row
            error_type_pattern = re.compile(r"\|\s*[^|]+\s*\|\s*([^\|]+?)\s*\|")
            # Extract the error types
            exceptions = error_type_pattern.findall(table_content)
            print(f"exceptions: {exceptions}")
            # Remove extra spaces if any
            exceptions = [exception.strip() for exception in exceptions]
            print(exceptions)
            documented_keys.update(exceptions)
 except Exception as e:
    raise Exception(
        f"Error reading documentation: {e}, \n repo base - {os.listdir(repo_base)}"
    )
 print(documented_keys)
 print(public_exceptions)
 print(error_names)
 # Compare and find undocumented keys
 undocumented_keys = error_names - documented_keys
 if undocumented_keys:
    raise Exception(
        f"\nKeys not documented in 'LiteLLM Exceptions': {undocumented_keys}"
    )
 else:
    print("\nAll keys are documented in 'LiteLLM Exceptions'. - {}".format(error_names))
--- a/tests/local_testing/test_router_fallbacks.py
+++ b/tests/local_testing/test_router_fallbacks.py
@ -1337,3 +1337,64 @@ async def test_anthropic_streaming_fallbacks(sync_mode):
        mock_client.assert_called_once()
        print(chunks)
        assert len(chunks) > 0
 def test_router_fallbacks_with_custom_model_costs():
    """
    Tests prod use-case where a custom model is registered with a different provider + custom costs.
    Goal: make sure custom model doesn't override default model costs.
    """
    model_list = [
        {
            "model_name": "claude-3-5-sonnet-20240620",
            "litellm_params": {
                "model": "claude-3-5-sonnet-20240620",
                "api_key": os.environ["ANTHROPIC_API_KEY"],
                "input_cost_per_token": 30,
                "output_cost_per_token": 60,
            },
        },
        {
            "model_name": "claude-3-5-sonnet-aihubmix",
            "litellm_params": {
                "model": "openai/claude-3-5-sonnet-20240620",
                "input_cost_per_token": 0.000003,  # 3$/M
                "output_cost_per_token": 0.000015,  # 15$/M
                "api_base": "https://exampleopenaiendpoint-production.up.railway.app",
                "api_key": "my-fake-key",
            },
        },
    ]
    router = Router(
        model_list=model_list,
        fallbacks=[{"claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"]}],
    )
    router.completion(
        model="claude-3-5-sonnet-aihubmix",
        messages=[{"role": "user", "content": "Hey, how's it going?"}],
    )
    model_info = litellm.get_model_info(model="claude-3-5-sonnet-20240620")
    print(f"key: {model_info['key']}")
    assert model_info["litellm_provider"] == "anthropic"
    response = router.completion(
        model="claude-3-5-sonnet-20240620",
        messages=[{"role": "user", "content": "Hey, how's it going?"}],
    )
    print(f"response_cost: {response._hidden_params['response_cost']}")
    assert response._hidden_params["response_cost"] > 10
    model_info = litellm.get_model_info(model="claude-3-5-sonnet-20240620")
    print(f"key: {model_info['key']}")
    assert model_info["input_cost_per_token"] == 30
    assert model_info["output_cost_per_token"] == 60