Litellm dev 01 07 2025 p1 (#7618)

* fix(main.py): pass custom llm provider on litellm logging provider update

* fix(cost_calculator.py): don't append provider name to return model if existing llm provider

Fixes https://github.com/BerriAI/litellm/issues/7607

* fix(prometheus_services.py): fix prometheus system health error logging

Fixes https://github.com/BerriAI/litellm/issues/7611
This commit is contained in:
Krish Dholakia 2025-01-07 21:22:31 -08:00 committed by GitHub
parent 09353db365
commit 73094873b2
5 changed files with 57 additions and 13 deletions

View file

@ -410,7 +410,9 @@ def _select_model_name_for_cost_calc(
if ( if (
return_model is not None return_model is not None
and custom_llm_provider is not None and custom_llm_provider is not None
and not return_model.startswith(custom_llm_provider) and not any(
return_model.startswith(provider) for provider in litellm.provider_list
)
): # add provider prefix if not already present, to match model_cost ): # add provider prefix if not already present, to match model_cost
if region_name is not None: if region_name is not None:
return_model = f"{custom_llm_provider}/{region_name}/{return_model}" return_model = f"{custom_llm_provider}/{region_name}/{return_model}"
@ -538,6 +540,7 @@ def completion_cost( # noqa: PLR0915
custom_pricing=custom_pricing, custom_pricing=custom_pricing,
base_model=base_model, base_model=base_model,
) )
if completion_response is not None and ( if completion_response is not None and (
isinstance(completion_response, BaseModel) isinstance(completion_response, BaseModel)
or isinstance(completion_response, dict) or isinstance(completion_response, dict)

View file

@ -9,6 +9,8 @@ from litellm._logging import print_verbose, verbose_logger
from litellm.types.integrations.prometheus import LATENCY_BUCKETS from litellm.types.integrations.prometheus import LATENCY_BUCKETS
from litellm.types.services import ServiceLoggerPayload, ServiceTypes from litellm.types.services import ServiceLoggerPayload, ServiceTypes
FAILED_REQUESTS_LABELS = ["error_class", "function_name"]
class PrometheusServicesLogger: class PrometheusServicesLogger:
# Class variables or attributes # Class variables or attributes
@ -44,7 +46,7 @@ class PrometheusServicesLogger:
counter_failed_request = self.create_counter( counter_failed_request = self.create_counter(
service, service,
type_of_request="failed_requests", type_of_request="failed_requests",
additional_labels=["error_class", "function_name"], additional_labels=FAILED_REQUESTS_LABELS,
) )
counter_total_requests = self.create_counter( counter_total_requests = self.create_counter(
service, type_of_request="total_requests" service, type_of_request="total_requests"
@ -204,10 +206,17 @@ class PrometheusServicesLogger:
for obj in prom_objects: for obj in prom_objects:
# increment both failed and total requests # increment both failed and total requests
if isinstance(obj, self.Counter): if isinstance(obj, self.Counter):
self.increment_counter( if "failed_requests" in obj._name:
counter=obj, self.increment_counter(
labels=payload.service.value, counter=obj,
# log additional_labels=["error_class", "function_name"], used for debugging what's going wrong with the DB labels=payload.service.value,
additional_labels=[error_class, function_name], # log additional_labels=["error_class", "function_name"], used for debugging what's going wrong with the DB
amount=1, # LOG ERROR COUNT TO PROMETHEUS additional_labels=[error_class, function_name],
) amount=1, # LOG ERROR COUNT TO PROMETHEUS
)
else:
self.increment_counter(
counter=obj,
labels=payload.service.value,
amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS
)

View file

@ -3274,6 +3274,7 @@ def embedding( # noqa: PLR0915
"stream_response": {}, "stream_response": {},
"cooldown_time": cooldown_time, "cooldown_time": cooldown_time,
}, },
custom_llm_provider=custom_llm_provider,
) )
if azure is True or custom_llm_provider == "azure": if azure is True or custom_llm_provider == "azure":
# azure configs # azure configs

View file

@ -1,8 +1,9 @@
model_list: model_list:
- model_name: azure-embedding-model - model_name: azure-embedding-model
litellm_params: litellm_params:
model: openai/gpt-3.5-turbo model: azure/azure-embedding-model
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/AZURE_API_KEY
api_base: os.environ/AZURE_API_BASE
- model_name: openai-text-completion - model_name: openai-text-completion
litellm_params: litellm_params:
model: openai/gpt-3.5-turbo model: openai/gpt-3.5-turbo
@ -17,5 +18,8 @@ model_list:
model: openai/gpt-3.5-turbo model: openai/gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
# litellm_settings:
# callbacks: ["otel"] litellm_settings:
service_callback: ["prometheus_system"]
callbacks: ["prometheus"]
cache: true

View file

@ -2702,3 +2702,30 @@ def test_select_model_name_for_cost_calc():
return_model = _select_model_name_for_cost_calc(**args) return_model = _select_model_name_for_cost_calc(**args)
assert return_model == "azure_ai/mistral-large" assert return_model == "azure_ai/mistral-large"
def test_cost_calculator_azure_embedding():
from litellm.cost_calculator import response_cost_calculator
from litellm.types.utils import EmbeddingResponse, Usage
kwargs = {
"response_object": EmbeddingResponse(
model="text-embedding-3-small",
data=[{"embedding": [1, 2, 3]}],
usage=Usage(prompt_tokens=10, completion_tokens=10),
),
"model": "text-embedding-3-small",
"cache_hit": None,
"custom_llm_provider": None,
"base_model": "azure/text-embedding-3-small",
"call_type": "aembedding",
"optional_params": {},
"custom_pricing": False,
"prompt": "Hello, world!",
}
try:
response_cost_calculator(**kwargs)
except Exception as e:
traceback.print_exc()
pytest.fail(f"Error: {e}")