diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 774ee96977..284ae16c56 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -410,7 +410,9 @@ def _select_model_name_for_cost_calc( if ( return_model is not None and custom_llm_provider is not None - and not return_model.startswith(custom_llm_provider) + and not any( + return_model.startswith(provider) for provider in litellm.provider_list + ) ): # add provider prefix if not already present, to match model_cost if region_name is not None: return_model = f"{custom_llm_provider}/{region_name}/{return_model}" @@ -538,6 +540,7 @@ def completion_cost( # noqa: PLR0915 custom_pricing=custom_pricing, base_model=base_model, ) + if completion_response is not None and ( isinstance(completion_response, BaseModel) or isinstance(completion_response, dict) diff --git a/litellm/integrations/prometheus_services.py b/litellm/integrations/prometheus_services.py index cea606c245..4bf293fb01 100644 --- a/litellm/integrations/prometheus_services.py +++ b/litellm/integrations/prometheus_services.py @@ -9,6 +9,8 @@ from litellm._logging import print_verbose, verbose_logger from litellm.types.integrations.prometheus import LATENCY_BUCKETS from litellm.types.services import ServiceLoggerPayload, ServiceTypes +FAILED_REQUESTS_LABELS = ["error_class", "function_name"] + class PrometheusServicesLogger: # Class variables or attributes @@ -44,7 +46,7 @@ class PrometheusServicesLogger: counter_failed_request = self.create_counter( service, type_of_request="failed_requests", - additional_labels=["error_class", "function_name"], + additional_labels=FAILED_REQUESTS_LABELS, ) counter_total_requests = self.create_counter( service, type_of_request="total_requests" @@ -204,10 +206,17 @@ class PrometheusServicesLogger: for obj in prom_objects: # increment both failed and total requests if isinstance(obj, self.Counter): - self.increment_counter( - counter=obj, - labels=payload.service.value, - # log additional_labels=["error_class", "function_name"], used for debugging what's going wrong with the DB - additional_labels=[error_class, function_name], - amount=1, # LOG ERROR COUNT TO PROMETHEUS - ) + if "failed_requests" in obj._name: + self.increment_counter( + counter=obj, + labels=payload.service.value, + # log additional_labels=["error_class", "function_name"], used for debugging what's going wrong with the DB + additional_labels=[error_class, function_name], + amount=1, # LOG ERROR COUNT TO PROMETHEUS + ) + else: + self.increment_counter( + counter=obj, + labels=payload.service.value, + amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS + ) diff --git a/litellm/main.py b/litellm/main.py index ba6d9a7249..054418e909 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -3274,6 +3274,7 @@ def embedding( # noqa: PLR0915 "stream_response": {}, "cooldown_time": cooldown_time, }, + custom_llm_provider=custom_llm_provider, ) if azure is True or custom_llm_provider == "azure": # azure configs diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 3222e69d86..91dca93acf 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,8 +1,9 @@ model_list: - model_name: azure-embedding-model litellm_params: - model: openai/gpt-3.5-turbo - api_key: os.environ/OPENAI_API_KEY + model: azure/azure-embedding-model + api_key: os.environ/AZURE_API_KEY + api_base: os.environ/AZURE_API_BASE - model_name: openai-text-completion litellm_params: model: openai/gpt-3.5-turbo @@ -17,5 +18,8 @@ model_list: model: openai/gpt-3.5-turbo api_key: os.environ/OPENAI_API_KEY -# litellm_settings: -# callbacks: ["otel"] \ No newline at end of file + +litellm_settings: + service_callback: ["prometheus_system"] + callbacks: ["prometheus"] + cache: true \ No newline at end of file diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py index 6b24e8edbb..585eac0c2a 100644 --- a/tests/local_testing/test_completion_cost.py +++ b/tests/local_testing/test_completion_cost.py @@ -2702,3 +2702,30 @@ def test_select_model_name_for_cost_calc(): return_model = _select_model_name_for_cost_calc(**args) assert return_model == "azure_ai/mistral-large" + + +def test_cost_calculator_azure_embedding(): + from litellm.cost_calculator import response_cost_calculator + from litellm.types.utils import EmbeddingResponse, Usage + + kwargs = { + "response_object": EmbeddingResponse( + model="text-embedding-3-small", + data=[{"embedding": [1, 2, 3]}], + usage=Usage(prompt_tokens=10, completion_tokens=10), + ), + "model": "text-embedding-3-small", + "cache_hit": None, + "custom_llm_provider": None, + "base_model": "azure/text-embedding-3-small", + "call_type": "aembedding", + "optional_params": {}, + "custom_pricing": False, + "prompt": "Hello, world!", + } + + try: + response_cost_calculator(**kwargs) + except Exception as e: + traceback.print_exc() + pytest.fail(f"Error: {e}")