Litellm dev 01 07 2025 p1 (#7618)

* fix(main.py): pass custom llm provider on litellm logging provider update * fix(cost_calculator.py): don't append provider name to return model if existing llm provider Fixes https://github.com/BerriAI/litellm/issues/7607 * fix(prometheus_services.py): fix prometheus system health error logging Fixes https://github.com/BerriAI/litellm/issues/7611
2025-04-26 19:24:27 +00:00 · 2025-01-07 21:22:31 -08:00 · 2025-01-07 21:22:31 -08:00 · 73094873b2
commit 73094873b2
parent 09353db365
5 changed files with 57 additions and 13 deletions
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -410,7 +410,9 @@ def _select_model_name_for_cost_calc(
    if (
        return_model is not None
        and custom_llm_provider is not None
-        and not return_model.startswith(custom_llm_provider)
+        and not any(
            return_model.startswith(provider) for provider in litellm.provider_list
        )
    ):  # add provider prefix if not already present, to match model_cost
        if region_name is not None:
            return_model = f"{custom_llm_provider}/{region_name}/{return_model}"
@ -538,6 +540,7 @@ def completion_cost(  # noqa: PLR0915
            custom_pricing=custom_pricing,
            base_model=base_model,
        )
        if completion_response is not None and (
            isinstance(completion_response, BaseModel)
            or isinstance(completion_response, dict)
--- a/litellm/integrations/prometheus_services.py
+++ b/litellm/integrations/prometheus_services.py
@ -9,6 +9,8 @@ from litellm._logging import print_verbose, verbose_logger
 from litellm.types.integrations.prometheus import LATENCY_BUCKETS
 from litellm.types.services import ServiceLoggerPayload, ServiceTypes
 FAILED_REQUESTS_LABELS = ["error_class", "function_name"]
 class PrometheusServicesLogger:
    # Class variables or attributes
@ -44,7 +46,7 @@ class PrometheusServicesLogger:
                counter_failed_request = self.create_counter(
                    service,
                    type_of_request="failed_requests",
-                    additional_labels=["error_class", "function_name"],
+                    additional_labels=FAILED_REQUESTS_LABELS,
                )
                counter_total_requests = self.create_counter(
                    service, type_of_request="total_requests"
@ -204,10 +206,17 @@ class PrometheusServicesLogger:
            for obj in prom_objects:
                # increment both failed and total requests
                if isinstance(obj, self.Counter):
-                    self.increment_counter(
+                    if "failed_requests" in obj._name:
-                        counter=obj,
+                        self.increment_counter(
-                        labels=payload.service.value,
+                            counter=obj,
-                        # log additional_labels=["error_class", "function_name"], used for debugging what's going wrong with the DB
+                            labels=payload.service.value,
-                        additional_labels=[error_class, function_name],
+                            # log additional_labels=["error_class", "function_name"], used for debugging what's going wrong with the DB
-                        amount=1,  # LOG ERROR COUNT TO PROMETHEUS
+                            additional_labels=[error_class, function_name],
-                    )
+                            amount=1,  # LOG ERROR COUNT TO PROMETHEUS
                        )
                    else:
                        self.increment_counter(
                            counter=obj,
                            labels=payload.service.value,
                            amount=1,  # LOG TOTAL REQUESTS TO PROMETHEUS
                        )
--- a/litellm/main.py
+++ b/litellm/main.py
@ -3274,6 +3274,7 @@ def embedding(  # noqa: PLR0915
                "stream_response": {},
                "cooldown_time": cooldown_time,
            },
            custom_llm_provider=custom_llm_provider,
        )
        if azure is True or custom_llm_provider == "azure":
            # azure configs
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,8 +1,9 @@
 model_list:
  - model_name: azure-embedding-model
    litellm_params:
-      model: openai/gpt-3.5-turbo
+      model: azure/azure-embedding-model
-      api_key: os.environ/OPENAI_API_KEY
+      api_key: os.environ/AZURE_API_KEY
      api_base: os.environ/AZURE_API_BASE
  - model_name: openai-text-completion
    litellm_params:
      model: openai/gpt-3.5-turbo
@ -17,5 +18,8 @@ model_list:
      model: openai/gpt-3.5-turbo
      api_key: os.environ/OPENAI_API_KEY
-# litellm_settings:
+
-#   callbacks: ["otel"]
+litellm_settings:
  service_callback: ["prometheus_system"]
  callbacks: ["prometheus"]
  cache: true
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@ -2702,3 +2702,30 @@ def test_select_model_name_for_cost_calc():
    return_model = _select_model_name_for_cost_calc(**args)
    assert return_model == "azure_ai/mistral-large"
 def test_cost_calculator_azure_embedding():
    from litellm.cost_calculator import response_cost_calculator
    from litellm.types.utils import EmbeddingResponse, Usage
    kwargs = {
        "response_object": EmbeddingResponse(
            model="text-embedding-3-small",
            data=[{"embedding": [1, 2, 3]}],
            usage=Usage(prompt_tokens=10, completion_tokens=10),
        ),
        "model": "text-embedding-3-small",
        "cache_hit": None,
        "custom_llm_provider": None,
        "base_model": "azure/text-embedding-3-small",
        "call_type": "aembedding",
        "optional_params": {},
        "custom_pricing": False,
        "prompt": "Hello, world!",
    }
    try:
        response_cost_calculator(**kwargs)
    except Exception as e:
        traceback.print_exc()
        pytest.fail(f"Error: {e}")