diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index 774ee96977..284ae16c56 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -410,7 +410,9 @@ def _select_model_name_for_cost_calc(
     if (
         return_model is not None
         and custom_llm_provider is not None
-        and not return_model.startswith(custom_llm_provider)
+        and not any(
+            return_model.startswith(provider) for provider in litellm.provider_list
+        )
     ):  # add provider prefix if not already present, to match model_cost
         if region_name is not None:
             return_model = f"{custom_llm_provider}/{region_name}/{return_model}"
@@ -538,6 +540,7 @@ def completion_cost(  # noqa: PLR0915
             custom_pricing=custom_pricing,
             base_model=base_model,
         )
+
         if completion_response is not None and (
             isinstance(completion_response, BaseModel)
             or isinstance(completion_response, dict)
diff --git a/litellm/integrations/prometheus_services.py b/litellm/integrations/prometheus_services.py
index cea606c245..4bf293fb01 100644
--- a/litellm/integrations/prometheus_services.py
+++ b/litellm/integrations/prometheus_services.py
@@ -9,6 +9,8 @@ from litellm._logging import print_verbose, verbose_logger
 from litellm.types.integrations.prometheus import LATENCY_BUCKETS
 from litellm.types.services import ServiceLoggerPayload, ServiceTypes
 
+FAILED_REQUESTS_LABELS = ["error_class", "function_name"]
+
 
 class PrometheusServicesLogger:
     # Class variables or attributes
@@ -44,7 +46,7 @@ class PrometheusServicesLogger:
                 counter_failed_request = self.create_counter(
                     service,
                     type_of_request="failed_requests",
-                    additional_labels=["error_class", "function_name"],
+                    additional_labels=FAILED_REQUESTS_LABELS,
                 )
                 counter_total_requests = self.create_counter(
                     service, type_of_request="total_requests"
@@ -204,10 +206,17 @@ class PrometheusServicesLogger:
             for obj in prom_objects:
                 # increment both failed and total requests
                 if isinstance(obj, self.Counter):
-                    self.increment_counter(
-                        counter=obj,
-                        labels=payload.service.value,
-                        # log additional_labels=["error_class", "function_name"], used for debugging what's going wrong with the DB
-                        additional_labels=[error_class, function_name],
-                        amount=1,  # LOG ERROR COUNT TO PROMETHEUS
-                    )
+                    if "failed_requests" in obj._name:
+                        self.increment_counter(
+                            counter=obj,
+                            labels=payload.service.value,
+                            # log additional_labels=["error_class", "function_name"], used for debugging what's going wrong with the DB
+                            additional_labels=[error_class, function_name],
+                            amount=1,  # LOG ERROR COUNT TO PROMETHEUS
+                        )
+                    else:
+                        self.increment_counter(
+                            counter=obj,
+                            labels=payload.service.value,
+                            amount=1,  # LOG TOTAL REQUESTS TO PROMETHEUS
+                        )
diff --git a/litellm/main.py b/litellm/main.py
index ba6d9a7249..054418e909 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -3274,6 +3274,7 @@ def embedding(  # noqa: PLR0915
                 "stream_response": {},
                 "cooldown_time": cooldown_time,
             },
+            custom_llm_provider=custom_llm_provider,
         )
         if azure is True or custom_llm_provider == "azure":
             # azure configs
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 3222e69d86..91dca93acf 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,8 +1,9 @@
 model_list:
   - model_name: azure-embedding-model
     litellm_params:
-      model: openai/gpt-3.5-turbo
-      api_key: os.environ/OPENAI_API_KEY
+      model: azure/azure-embedding-model
+      api_key: os.environ/AZURE_API_KEY
+      api_base: os.environ/AZURE_API_BASE
   - model_name: openai-text-completion
     litellm_params:
       model: openai/gpt-3.5-turbo
@@ -17,5 +18,8 @@ model_list:
       model: openai/gpt-3.5-turbo
       api_key: os.environ/OPENAI_API_KEY
 
-# litellm_settings:
-#   callbacks: ["otel"]
\ No newline at end of file
+
+litellm_settings:
+  service_callback: ["prometheus_system"]
+  callbacks: ["prometheus"]
+  cache: true
\ No newline at end of file
diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py
index 6b24e8edbb..585eac0c2a 100644
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@@ -2702,3 +2702,30 @@ def test_select_model_name_for_cost_calc():
 
     return_model = _select_model_name_for_cost_calc(**args)
     assert return_model == "azure_ai/mistral-large"
+
+
+def test_cost_calculator_azure_embedding():
+    from litellm.cost_calculator import response_cost_calculator
+    from litellm.types.utils import EmbeddingResponse, Usage
+
+    kwargs = {
+        "response_object": EmbeddingResponse(
+            model="text-embedding-3-small",
+            data=[{"embedding": [1, 2, 3]}],
+            usage=Usage(prompt_tokens=10, completion_tokens=10),
+        ),
+        "model": "text-embedding-3-small",
+        "cache_hit": None,
+        "custom_llm_provider": None,
+        "base_model": "azure/text-embedding-3-small",
+        "call_type": "aembedding",
+        "optional_params": {},
+        "custom_pricing": False,
+        "prompt": "Hello, world!",
+    }
+
+    try:
+        response_cost_calculator(**kwargs)
+    except Exception as e:
+        traceback.print_exc()
+        pytest.fail(f"Error: {e}")