Litellm dev 01 07 2025 p1 (#7618)

* fix(main.py): pass custom llm provider on litellm logging provider update * fix(cost_calculator.py): don't append provider name to return model if existing llm provider Fixes https://github.com/BerriAI/litellm/issues/7607 * fix(prometheus_services.py): fix prometheus system health error logging Fixes https://github.com/BerriAI/litellm/issues/7611
2025-04-26 11:14:04 +00:00 · 2025-01-07 21:22:31 -08:00 · 2025-01-07 21:22:31 -08:00 · 73094873b2
commit 73094873b2
parent 09353db365
5 changed files with 57 additions and 13 deletions
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -410,7 +410,9 @@ def _select_model_name_for_cost_calc(
    if (
        return_model is not None
        and custom_llm_provider is not None
-        and not return_model.startswith(custom_llm_provider)
+        and not any(
+            return_model.startswith(provider) for provider in litellm.provider_list
+        )
    ):  # add provider prefix if not already present, to match model_cost
        if region_name is not None:
            return_model = f"{custom_llm_provider}/{region_name}/{return_model}"
@ -538,6 +540,7 @@ def completion_cost(  # noqa: PLR0915
            custom_pricing=custom_pricing,
            base_model=base_model,
        )
+
        if completion_response is not None and (
            isinstance(completion_response, BaseModel)
            or isinstance(completion_response, dict)
--- a/litellm/integrations/prometheus_services.py
+++ b/litellm/integrations/prometheus_services.py
@ -9,6 +9,8 @@ from litellm._logging import print_verbose, verbose_logger
 from litellm.types.integrations.prometheus import LATENCY_BUCKETS
 from litellm.types.services import ServiceLoggerPayload, ServiceTypes

+FAILED_REQUESTS_LABELS = ["error_class", "function_name"]
+

 class PrometheusServicesLogger:
    # Class variables or attributes
@ -44,7 +46,7 @@ class PrometheusServicesLogger:
                counter_failed_request = self.create_counter(
                    service,
                    type_of_request="failed_requests",
-                    additional_labels=["error_class", "function_name"],
+                    additional_labels=FAILED_REQUESTS_LABELS,
                )
                counter_total_requests = self.create_counter(
                    service, type_of_request="total_requests"
@ -204,6 +206,7 @@ class PrometheusServicesLogger:
            for obj in prom_objects:
                # increment both failed and total requests
                if isinstance(obj, self.Counter):
+                    if "failed_requests" in obj._name:
                        self.increment_counter(
                            counter=obj,
                            labels=payload.service.value,
@ -211,3 +214,9 @@ class PrometheusServicesLogger:
                            additional_labels=[error_class, function_name],
                            amount=1,  # LOG ERROR COUNT TO PROMETHEUS
                        )
+                    else:
+                        self.increment_counter(
+                            counter=obj,
+                            labels=payload.service.value,
+                            amount=1,  # LOG TOTAL REQUESTS TO PROMETHEUS
+                        )
--- a/litellm/main.py
+++ b/litellm/main.py
@ -3274,6 +3274,7 @@ def embedding(  # noqa: PLR0915
                "stream_response": {},
                "cooldown_time": cooldown_time,
            },
+            custom_llm_provider=custom_llm_provider,
        )
        if azure is True or custom_llm_provider == "azure":
            # azure configs
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,8 +1,9 @@
 model_list:
  - model_name: azure-embedding-model
    litellm_params:
-      model: openai/gpt-3.5-turbo
-      api_key: os.environ/OPENAI_API_KEY
+      model: azure/azure-embedding-model
+      api_key: os.environ/AZURE_API_KEY
+      api_base: os.environ/AZURE_API_BASE
  - model_name: openai-text-completion
    litellm_params:
      model: openai/gpt-3.5-turbo
@ -17,5 +18,8 @@ model_list:
      model: openai/gpt-3.5-turbo
      api_key: os.environ/OPENAI_API_KEY

-# litellm_settings:
-#   callbacks: ["otel"]
+
+litellm_settings:
+  service_callback: ["prometheus_system"]
+  callbacks: ["prometheus"]
+  cache: true
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@ -2702,3 +2702,30 @@ def test_select_model_name_for_cost_calc():

    return_model = _select_model_name_for_cost_calc(**args)
    assert return_model == "azure_ai/mistral-large"
+
+
+def test_cost_calculator_azure_embedding():
+    from litellm.cost_calculator import response_cost_calculator
+    from litellm.types.utils import EmbeddingResponse, Usage
+
+    kwargs = {
+        "response_object": EmbeddingResponse(
+            model="text-embedding-3-small",
+            data=[{"embedding": [1, 2, 3]}],
+            usage=Usage(prompt_tokens=10, completion_tokens=10),
+        ),
+        "model": "text-embedding-3-small",
+        "cache_hit": None,
+        "custom_llm_provider": None,
+        "base_model": "azure/text-embedding-3-small",
+        "call_type": "aembedding",
+        "optional_params": {},
+        "custom_pricing": False,
+        "prompt": "Hello, world!",
+    }
+
+    try:
+        response_cost_calculator(**kwargs)
+    except Exception as e:
+        traceback.print_exc()
+        pytest.fail(f"Error: {e}")