mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
Litellm dev 01 07 2025 p1 (#7618)
* fix(main.py): pass custom llm provider on litellm logging provider update * fix(cost_calculator.py): don't append provider name to return model if existing llm provider Fixes https://github.com/BerriAI/litellm/issues/7607 * fix(prometheus_services.py): fix prometheus system health error logging Fixes https://github.com/BerriAI/litellm/issues/7611
This commit is contained in:
parent
09353db365
commit
73094873b2
5 changed files with 57 additions and 13 deletions
|
@ -410,7 +410,9 @@ def _select_model_name_for_cost_calc(
|
||||||
if (
|
if (
|
||||||
return_model is not None
|
return_model is not None
|
||||||
and custom_llm_provider is not None
|
and custom_llm_provider is not None
|
||||||
and not return_model.startswith(custom_llm_provider)
|
and not any(
|
||||||
|
return_model.startswith(provider) for provider in litellm.provider_list
|
||||||
|
)
|
||||||
): # add provider prefix if not already present, to match model_cost
|
): # add provider prefix if not already present, to match model_cost
|
||||||
if region_name is not None:
|
if region_name is not None:
|
||||||
return_model = f"{custom_llm_provider}/{region_name}/{return_model}"
|
return_model = f"{custom_llm_provider}/{region_name}/{return_model}"
|
||||||
|
@ -538,6 +540,7 @@ def completion_cost( # noqa: PLR0915
|
||||||
custom_pricing=custom_pricing,
|
custom_pricing=custom_pricing,
|
||||||
base_model=base_model,
|
base_model=base_model,
|
||||||
)
|
)
|
||||||
|
|
||||||
if completion_response is not None and (
|
if completion_response is not None and (
|
||||||
isinstance(completion_response, BaseModel)
|
isinstance(completion_response, BaseModel)
|
||||||
or isinstance(completion_response, dict)
|
or isinstance(completion_response, dict)
|
||||||
|
|
|
@ -9,6 +9,8 @@ from litellm._logging import print_verbose, verbose_logger
|
||||||
from litellm.types.integrations.prometheus import LATENCY_BUCKETS
|
from litellm.types.integrations.prometheus import LATENCY_BUCKETS
|
||||||
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
||||||
|
|
||||||
|
FAILED_REQUESTS_LABELS = ["error_class", "function_name"]
|
||||||
|
|
||||||
|
|
||||||
class PrometheusServicesLogger:
|
class PrometheusServicesLogger:
|
||||||
# Class variables or attributes
|
# Class variables or attributes
|
||||||
|
@ -44,7 +46,7 @@ class PrometheusServicesLogger:
|
||||||
counter_failed_request = self.create_counter(
|
counter_failed_request = self.create_counter(
|
||||||
service,
|
service,
|
||||||
type_of_request="failed_requests",
|
type_of_request="failed_requests",
|
||||||
additional_labels=["error_class", "function_name"],
|
additional_labels=FAILED_REQUESTS_LABELS,
|
||||||
)
|
)
|
||||||
counter_total_requests = self.create_counter(
|
counter_total_requests = self.create_counter(
|
||||||
service, type_of_request="total_requests"
|
service, type_of_request="total_requests"
|
||||||
|
@ -204,6 +206,7 @@ class PrometheusServicesLogger:
|
||||||
for obj in prom_objects:
|
for obj in prom_objects:
|
||||||
# increment both failed and total requests
|
# increment both failed and total requests
|
||||||
if isinstance(obj, self.Counter):
|
if isinstance(obj, self.Counter):
|
||||||
|
if "failed_requests" in obj._name:
|
||||||
self.increment_counter(
|
self.increment_counter(
|
||||||
counter=obj,
|
counter=obj,
|
||||||
labels=payload.service.value,
|
labels=payload.service.value,
|
||||||
|
@ -211,3 +214,9 @@ class PrometheusServicesLogger:
|
||||||
additional_labels=[error_class, function_name],
|
additional_labels=[error_class, function_name],
|
||||||
amount=1, # LOG ERROR COUNT TO PROMETHEUS
|
amount=1, # LOG ERROR COUNT TO PROMETHEUS
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
self.increment_counter(
|
||||||
|
counter=obj,
|
||||||
|
labels=payload.service.value,
|
||||||
|
amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS
|
||||||
|
)
|
||||||
|
|
|
@ -3274,6 +3274,7 @@ def embedding( # noqa: PLR0915
|
||||||
"stream_response": {},
|
"stream_response": {},
|
||||||
"cooldown_time": cooldown_time,
|
"cooldown_time": cooldown_time,
|
||||||
},
|
},
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
)
|
)
|
||||||
if azure is True or custom_llm_provider == "azure":
|
if azure is True or custom_llm_provider == "azure":
|
||||||
# azure configs
|
# azure configs
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: azure-embedding-model
|
- model_name: azure-embedding-model
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/gpt-3.5-turbo
|
model: azure/azure-embedding-model
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
- model_name: openai-text-completion
|
- model_name: openai-text-completion
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/gpt-3.5-turbo
|
model: openai/gpt-3.5-turbo
|
||||||
|
@ -17,5 +18,8 @@ model_list:
|
||||||
model: openai/gpt-3.5-turbo
|
model: openai/gpt-3.5-turbo
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
|
||||||
# litellm_settings:
|
|
||||||
# callbacks: ["otel"]
|
litellm_settings:
|
||||||
|
service_callback: ["prometheus_system"]
|
||||||
|
callbacks: ["prometheus"]
|
||||||
|
cache: true
|
|
@ -2702,3 +2702,30 @@ def test_select_model_name_for_cost_calc():
|
||||||
|
|
||||||
return_model = _select_model_name_for_cost_calc(**args)
|
return_model = _select_model_name_for_cost_calc(**args)
|
||||||
assert return_model == "azure_ai/mistral-large"
|
assert return_model == "azure_ai/mistral-large"
|
||||||
|
|
||||||
|
|
||||||
|
def test_cost_calculator_azure_embedding():
|
||||||
|
from litellm.cost_calculator import response_cost_calculator
|
||||||
|
from litellm.types.utils import EmbeddingResponse, Usage
|
||||||
|
|
||||||
|
kwargs = {
|
||||||
|
"response_object": EmbeddingResponse(
|
||||||
|
model="text-embedding-3-small",
|
||||||
|
data=[{"embedding": [1, 2, 3]}],
|
||||||
|
usage=Usage(prompt_tokens=10, completion_tokens=10),
|
||||||
|
),
|
||||||
|
"model": "text-embedding-3-small",
|
||||||
|
"cache_hit": None,
|
||||||
|
"custom_llm_provider": None,
|
||||||
|
"base_model": "azure/text-embedding-3-small",
|
||||||
|
"call_type": "aembedding",
|
||||||
|
"optional_params": {},
|
||||||
|
"custom_pricing": False,
|
||||||
|
"prompt": "Hello, world!",
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response_cost_calculator(**kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
|
pytest.fail(f"Error: {e}")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue