diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index d61e812d0..2504a95f1 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -101,8 +101,12 @@ def cost_per_token(
     if custom_llm_provider is not None:
         model_with_provider = custom_llm_provider + "/" + model
         if region_name is not None:
-            model_with_provider_and_region = f"{custom_llm_provider}/{region_name}/{model}"
-            if model_with_provider_and_region in model_cost_ref:  # use region based pricing, if it's available
+            model_with_provider_and_region = (
+                f"{custom_llm_provider}/{region_name}/{model}"
+            )
+            if (
+                model_with_provider_and_region in model_cost_ref
+            ):  # use region based pricing, if it's available
                 model_with_provider = model_with_provider_and_region
     else:
         _, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
@@ -118,7 +122,9 @@ def cost_per_token(
     Option2. model = "openai/gpt-4"       - model = provider/model
     Option3. model = "anthropic.claude-3" - model = model
     """
-    if model_with_provider in model_cost_ref:  # Option 2. use model with provider, model = "openai/gpt-4"
+    if (
+        model_with_provider in model_cost_ref
+    ):  # Option 2. use model with provider, model = "openai/gpt-4"
         model = model_with_provider
     elif model in model_cost_ref:  # Option 1. use model passed, model="gpt-4"
         model = model
@@ -154,29 +160,45 @@ def cost_per_token(
         )
     elif model in model_cost_ref:
         print_verbose(f"Success: model={model} in model_cost_map")
-        print_verbose(f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}")
+        print_verbose(
+            f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}"
+        )
         if (
             model_cost_ref[model].get("input_cost_per_token", None) is not None
             and model_cost_ref[model].get("output_cost_per_token", None) is not None
         ):
             ## COST PER TOKEN ##
-            prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
-            completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens
-        elif model_cost_ref[model].get("output_cost_per_second", None) is not None and response_time_ms is not None:
+            prompt_tokens_cost_usd_dollar = (
+                model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+            )
+        elif (
+            model_cost_ref[model].get("output_cost_per_second", None) is not None
+            and response_time_ms is not None
+        ):
             print_verbose(
                 f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}"
             )
             ## COST PER SECOND ##
             prompt_tokens_cost_usd_dollar = 0
             completion_tokens_cost_usd_dollar = (
-                model_cost_ref[model]["output_cost_per_second"] * response_time_ms / 1000
+                model_cost_ref[model]["output_cost_per_second"]
+                * response_time_ms
+                / 1000
             )
-        elif model_cost_ref[model].get("input_cost_per_second", None) is not None and response_time_ms is not None:
+        elif (
+            model_cost_ref[model].get("input_cost_per_second", None) is not None
+            and response_time_ms is not None
+        ):
             print_verbose(
                 f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}"
             )
             ## COST PER SECOND ##
-            prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000
+            prompt_tokens_cost_usd_dollar = (
+                model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000
+            )
             completion_tokens_cost_usd_dollar = 0.0
         print_verbose(
             f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
@@ -185,40 +207,57 @@ def cost_per_token(
     elif "ft:gpt-3.5-turbo" in model:
         print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
         # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
-        prompt_tokens_cost_usd_dollar = model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
+        )
         completion_tokens_cost_usd_dollar = (
-            model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"] * completion_tokens
+            model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"]
+            * completion_tokens
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif "ft:gpt-4-0613" in model:
         print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
         # fuzzy match ft:gpt-4-0613:abcd-id-cool-litellm
-        prompt_tokens_cost_usd_dollar = model_cost_ref["ft:gpt-4-0613"]["input_cost_per_token"] * prompt_tokens
-        completion_tokens_cost_usd_dollar = model_cost_ref["ft:gpt-4-0613"]["output_cost_per_token"] * completion_tokens
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:gpt-4-0613"]["input_cost_per_token"] * prompt_tokens
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:gpt-4-0613"]["output_cost_per_token"] * completion_tokens
+        )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif "ft:gpt-4o-2024-05-13" in model:
         print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
         # fuzzy match ft:gpt-4o-2024-05-13:abcd-id-cool-litellm
-        prompt_tokens_cost_usd_dollar = model_cost_ref["ft:gpt-4o-2024-05-13"]["input_cost_per_token"] * prompt_tokens
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:gpt-4o-2024-05-13"]["input_cost_per_token"]
+            * prompt_tokens
+        )
         completion_tokens_cost_usd_dollar = (
-            model_cost_ref["ft:gpt-4o-2024-05-13"]["output_cost_per_token"] * completion_tokens
+            model_cost_ref["ft:gpt-4o-2024-05-13"]["output_cost_per_token"]
+            * completion_tokens
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
 
     elif "ft:davinci-002" in model:
         print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
         # fuzzy match ft:davinci-002:abcd-id-cool-litellm
-        prompt_tokens_cost_usd_dollar = model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens
+        )
         completion_tokens_cost_usd_dollar = (
-            model_cost_ref["ft:davinci-002"]["output_cost_per_token"] * completion_tokens
+            model_cost_ref["ft:davinci-002"]["output_cost_per_token"]
+            * completion_tokens
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif "ft:babbage-002" in model:
         print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
         # fuzzy match ft:babbage-002:abcd-id-cool-litellm
-        prompt_tokens_cost_usd_dollar = model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens
+        )
         completion_tokens_cost_usd_dollar = (
-            model_cost_ref["ft:babbage-002"]["output_cost_per_token"] * completion_tokens
+            model_cost_ref["ft:babbage-002"]["output_cost_per_token"]
+            * completion_tokens
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif model in litellm.azure_llms:
@@ -227,17 +266,25 @@ def cost_per_token(
         verbose_logger.debug(
             f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
         )
-        prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+        )
         verbose_logger.debug(
             f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
         )
-        completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+        )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif model in litellm.azure_embedding_models:
         verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
         model = litellm.azure_embedding_models[model]
-        prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
-        completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+        )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     else:
         # if model is not in model_prices_and_context_window.json. Raise an exception-let users know
@@ -261,7 +308,9 @@ def get_model_params_and_category(model_name) -> str:
     import re
 
     model_name = model_name.lower()
-    re_params_match = re.search(r"(\d+b)", model_name)  # catch all decimals like 3b, 70b, etc
+    re_params_match = re.search(
+        r"(\d+b)", model_name
+    )  # catch all decimals like 3b, 70b, etc
     category = None
     if re_params_match is not None:
         params_match = str(re_params_match.group(1))
@@ -292,7 +341,9 @@ def get_model_params_and_category(model_name) -> str:
 def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
     # see https://replicate.com/pricing
     # for all litellm currently supported LLMs, almost all requests go to a100_80gb
-    a100_80gb_price_per_second_public = 0.001400  # assume all calls sent to A100 80GB for now
+    a100_80gb_price_per_second_public = (
+        0.001400  # assume all calls sent to A100 80GB for now
+    )
     if total_time == 0.0:  # total time is in ms
         start_time = completion_response["created"]
         end_time = getattr(completion_response, "ended", time.time())
@@ -377,13 +428,16 @@ def completion_cost(
         prompt_characters = 0
         completion_tokens = 0
         completion_characters = 0
-        custom_llm_provider = None
         if completion_response is not None:
             # get input/output tokens from completion_response
             prompt_tokens = completion_response.get("usage", {}).get("prompt_tokens", 0)
-            completion_tokens = completion_response.get("usage", {}).get("completion_tokens", 0)
+            completion_tokens = completion_response.get("usage", {}).get(
+                "completion_tokens", 0
+            )
             total_time = completion_response.get("_response_ms", 0)
-            verbose_logger.debug(f"completion_response response ms: {completion_response.get('_response_ms')} ")
+            verbose_logger.debug(
+                f"completion_response response ms: {completion_response.get('_response_ms')} "
+            )
             model = model or completion_response.get(
                 "model", None
             )  # check if user passed an override for model, if it's none check completion_response['model']
@@ -393,16 +447,30 @@ def completion_cost(
                     and len(completion_response._hidden_params["model"]) > 0
                 ):
                     model = completion_response._hidden_params.get("model", model)
-                custom_llm_provider = completion_response._hidden_params.get("custom_llm_provider", "")
-                region_name = completion_response._hidden_params.get("region_name", region_name)
-                size = completion_response._hidden_params.get("optional_params", {}).get(
+                custom_llm_provider = completion_response._hidden_params.get(
+                    "custom_llm_provider", ""
+                )
+                region_name = completion_response._hidden_params.get(
+                    "region_name", region_name
+                )
+                size = completion_response._hidden_params.get(
+                    "optional_params", {}
+                ).get(
                     "size", "1024-x-1024"
                 )  # openai default
-                quality = completion_response._hidden_params.get("optional_params", {}).get(
+                quality = completion_response._hidden_params.get(
+                    "optional_params", {}
+                ).get(
                     "quality", "standard"
                 )  # openai default
-                n = completion_response._hidden_params.get("optional_params", {}).get("n", 1)  # openai default
+                n = completion_response._hidden_params.get("optional_params", {}).get(
+                    "n", 1
+                )  # openai default
         else:
+            if model is None:
+                raise ValueError(
+                    f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
+                )
             if len(messages) > 0:
                 prompt_tokens = token_counter(model=model, messages=messages)
             elif len(prompt) > 0:
@@ -413,7 +481,19 @@ def completion_cost(
                 f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
             )
 
-        if call_type == CallTypes.image_generation.value or call_type == CallTypes.aimage_generation.value:
+        if custom_llm_provider is None:
+            try:
+                _, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
+            except Exception as e:
+                verbose_logger.error(
+                    "litellm.cost_calculator.py::completion_cost() - Error inferring custom_llm_provider - {}".format(
+                        str(e)
+                    )
+                )
+        if (
+            call_type == CallTypes.image_generation.value
+            or call_type == CallTypes.aimage_generation.value
+        ):
             ### IMAGE GENERATION COST CALCULATION ###
             if custom_llm_provider == "vertex_ai":
                 # https://cloud.google.com/vertex-ai/generative-ai/pricing
@@ -431,23 +511,43 @@ def completion_cost(
             height = int(size[0])  # if it's 1024-x-1024 vs. 1024x1024
             width = int(size[1])
             verbose_logger.debug(f"image_gen_model_name: {image_gen_model_name}")
-            verbose_logger.debug(f"image_gen_model_name_with_quality: {image_gen_model_name_with_quality}")
+            verbose_logger.debug(
+                f"image_gen_model_name_with_quality: {image_gen_model_name_with_quality}"
+            )
             if image_gen_model_name in litellm.model_cost:
-                return litellm.model_cost[image_gen_model_name]["input_cost_per_pixel"] * height * width * n
+                return (
+                    litellm.model_cost[image_gen_model_name]["input_cost_per_pixel"]
+                    * height
+                    * width
+                    * n
+                )
             elif image_gen_model_name_with_quality in litellm.model_cost:
                 return (
-                    litellm.model_cost[image_gen_model_name_with_quality]["input_cost_per_pixel"] * height * width * n
+                    litellm.model_cost[image_gen_model_name_with_quality][
+                        "input_cost_per_pixel"
+                    ]
+                    * height
+                    * width
+                    * n
                 )
             else:
-                raise Exception(f"Model={image_gen_model_name} not found in completion cost model map")
+                raise Exception(
+                    f"Model={image_gen_model_name} not found in completion cost model map"
+                )
         # Calculate cost based on prompt_tokens, completion_tokens
-        if "togethercomputer" in model or "together_ai" in model or custom_llm_provider == "together_ai":
+        if (
+            "togethercomputer" in model
+            or "together_ai" in model
+            or custom_llm_provider == "together_ai"
+        ):
             # together ai prices based on size of llm
             # get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json
             model = get_model_params_and_category(model)
         # replicate llms are calculate based on time for request running
         # see https://replicate.com/pricing
-        elif (model in litellm.replicate_models or "replicate" in model) and model not in litellm.model_cost:
+        elif (
+            model in litellm.replicate_models or "replicate" in model
+        ) and model not in litellm.model_cost:
             # for unmapped replicate model, default to replicate's time tracking logic
             return get_replicate_completion_pricing(completion_response, total_time)
 
@@ -456,23 +556,26 @@ def completion_cost(
                 f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
             )
 
-        if (
-            custom_llm_provider is not None
-            and custom_llm_provider == "vertex_ai"
-            and completion_response is not None
-            and isinstance(completion_response, ModelResponse)
-        ):
+        if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
             # Calculate the prompt characters + response characters
             if len("messages") > 0:
-                prompt_string = litellm.utils.get_formatted_prompt(data={"messages": messages}, call_type="completion")
+                prompt_string = litellm.utils.get_formatted_prompt(
+                    data={"messages": messages}, call_type="completion"
+                )
             else:
                 prompt_string = ""
 
             prompt_characters = litellm.utils._count_characters(text=prompt_string)
+            if completion_response is not None and isinstance(
+                completion_response, ModelResponse
+            ):
+                completion_string = litellm.utils.get_response_string(
+                    response_obj=completion_response
+                )
 
-            completion_string = litellm.utils.get_response_string(response_obj=completion_response)
-
-            completion_characters = litellm.utils._count_characters(text=completion_string)
+                completion_characters = litellm.utils._count_characters(
+                    text=completion_string
+                )
 
         (
             prompt_tokens_cost_usd_dollar,
@@ -544,7 +647,9 @@ def response_cost_calculator(
                 )
             else:
                 if (
-                    model in litellm.model_cost and custom_pricing is not None and custom_llm_provider is True
+                    model in litellm.model_cost
+                    and custom_pricing is not None
+                    and custom_llm_provider is True
                 ):  # override defaults if custom pricing is set
                     base_model = model
                 # base_model defaults to None if not set on model_info
@@ -556,5 +661,7 @@ def response_cost_calculator(
                 )
         return response_cost
     except litellm.NotFoundError as e:
-        print_verbose(f"Model={model} for LLM Provider={custom_llm_provider} not found in completion cost map.")
+        print_verbose(
+            f"Model={model} for LLM Provider={custom_llm_provider} not found in completion cost map."
+        )
         return None
diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py
index 5d73b9435..fe10cc017 100644
--- a/litellm/llms/azure.py
+++ b/litellm/llms/azure.py
@@ -660,8 +660,16 @@ class AzureChatCompletion(BaseLLM):
             response = await azure_client.chat.completions.create(
                 **data, timeout=timeout
             )
+
+            stringified_response = response.model_dump()
+            logging_obj.post_call(
+                input=data["messages"],
+                api_key=api_key,
+                original_response=stringified_response,
+                additional_args={"complete_input_dict": data},
+            )
             return convert_to_model_response_object(
-                response_object=response.model_dump(),
+                response_object=stringified_response,
                 model_response_object=model_response,
             )
         except AzureOpenAIError as e:
diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index a97d6812c..b35914584 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -663,19 +663,23 @@ def convert_url_to_base64(url):
         image_bytes = response.content
         base64_image = base64.b64encode(image_bytes).decode("utf-8")
 
-        img_type = url.split(".")[-1].lower()
-        if img_type == "jpg" or img_type == "jpeg":
-            img_type = "image/jpeg"
-        elif img_type == "png":
-            img_type = "image/png"
-        elif img_type == "gif":
-            img_type = "image/gif"
-        elif img_type == "webp":
-            img_type = "image/webp"
+        image_type = response.headers.get("Content-Type", None)
+        if image_type is not None and image_type.startswith("image/"):
+            img_type = image_type
         else:
-            raise Exception(
-                f"Error: Unsupported image format. Format={img_type}. Supported types = ['image/jpeg', 'image/png', 'image/gif', 'image/webp']"
-            )
+            img_type = url.split(".")[-1].lower()
+            if img_type == "jpg" or img_type == "jpeg":
+                img_type = "image/jpeg"
+            elif img_type == "png":
+                img_type = "image/png"
+            elif img_type == "gif":
+                img_type = "image/gif"
+            elif img_type == "webp":
+                img_type = "image/webp"
+            else:
+                raise Exception(
+                    f"Error: Unsupported image format. Format={img_type}. Supported types = ['image/jpeg', 'image/png', 'image/gif', 'image/webp']"
+                )
 
         return f"data:{img_type};base64,{base64_image}"
     else:
diff --git a/litellm/main.py b/litellm/main.py
index 649581936..318d0b7fe 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -1025,7 +1025,7 @@ def completion(
                 client=client,  # pass AsyncAzureOpenAI, AzureOpenAI client
             )
 
-            if optional_params.get("stream", False) or acompletion == True:
+            if optional_params.get("stream", False):
                 ## LOGGING
                 logging.post_call(
                     input=messages,
diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index 963cdf027..aec6215ce 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -175,8 +175,13 @@ async def add_litellm_data_to_request(
 
 
 def _add_otel_traceparent_to_data(data: dict, request: Request):
+    from litellm.proxy.proxy_server import open_telemetry_logger
     if data is None:
         return
+    if open_telemetry_logger is None:
+        # if user is not use OTEL don't send extra_headers
+        # relevant issue: https://github.com/BerriAI/litellm/issues/4448
+        return
     if request.headers:
         if "traceparent" in request.headers:
             # we want to forward this to the LLM Provider
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 1c10ef461..5138e9b61 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -23,7 +23,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.llms.prompt_templates.factory import anthropic_messages_pt
 
-# litellm.num_retries=3
+# litellm.num_retries = 3
 litellm.cache = None
 litellm.success_callback = []
 user_message = "Write a short poem about the sky"
diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py
index e854345b3..3a65f7294 100644
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@@ -4,7 +4,9 @@ import traceback
 
 import litellm.cost_calculator
 
-sys.path.insert(0, os.path.abspath("../.."))  # Adds the parent directory to the system path
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import asyncio
 import time
 from typing import Optional
@@ -167,11 +169,15 @@ def test_cost_ft_gpt_35():
         input_cost = model_cost["ft:gpt-3.5-turbo"]["input_cost_per_token"]
         output_cost = model_cost["ft:gpt-3.5-turbo"]["output_cost_per_token"]
         print(input_cost, output_cost)
-        expected_cost = (input_cost * resp.usage.prompt_tokens) + (output_cost * resp.usage.completion_tokens)
+        expected_cost = (input_cost * resp.usage.prompt_tokens) + (
+            output_cost * resp.usage.completion_tokens
+        )
         print("\n Excpected cost", expected_cost)
         assert cost == expected_cost
     except Exception as e:
-        pytest.fail(f"Cost Calc failed for ft:gpt-3.5. Expected {expected_cost}, Calculated cost {cost}")
+        pytest.fail(
+            f"Cost Calc failed for ft:gpt-3.5. Expected {expected_cost}, Calculated cost {cost}"
+        )
 
 
 # test_cost_ft_gpt_35()
@@ -200,15 +206,21 @@ def test_cost_azure_gpt_35():
             usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38),
         )
 
-        cost = litellm.completion_cost(completion_response=resp, model="azure/gpt-35-turbo")
+        cost = litellm.completion_cost(
+            completion_response=resp, model="azure/gpt-35-turbo"
+        )
         print("\n Calculated Cost for azure/gpt-3.5-turbo", cost)
         input_cost = model_cost["azure/gpt-35-turbo"]["input_cost_per_token"]
         output_cost = model_cost["azure/gpt-35-turbo"]["output_cost_per_token"]
-        expected_cost = (input_cost * resp.usage.prompt_tokens) + (output_cost * resp.usage.completion_tokens)
+        expected_cost = (input_cost * resp.usage.prompt_tokens) + (
+            output_cost * resp.usage.completion_tokens
+        )
         print("\n Excpected cost", expected_cost)
         assert cost == expected_cost
     except Exception as e:
-        pytest.fail(f"Cost Calc failed for azure/gpt-3.5-turbo. Expected {expected_cost}, Calculated cost {cost}")
+        pytest.fail(
+            f"Cost Calc failed for azure/gpt-3.5-turbo. Expected {expected_cost}, Calculated cost {cost}"
+        )
 
 
 # test_cost_azure_gpt_35()
@@ -239,7 +251,9 @@ def test_cost_azure_embedding():
         assert cost == expected_cost
 
     except Exception as e:
-        pytest.fail(f"Cost Calc failed for azure/gpt-3.5-turbo. Expected {expected_cost}, Calculated cost {cost}")
+        pytest.fail(
+            f"Cost Calc failed for azure/gpt-3.5-turbo. Expected {expected_cost}, Calculated cost {cost}"
+        )
 
 
 # test_cost_azure_embedding()
@@ -315,7 +329,9 @@ def test_cost_bedrock_pricing_actual_calls():
     litellm.set_verbose = True
     model = "anthropic.claude-instant-v1"
     messages = [{"role": "user", "content": "Hey, how's it going?"}]
-    response = litellm.completion(model=model, messages=messages, mock_response="hello cool one")
+    response = litellm.completion(
+        model=model, messages=messages, mock_response="hello cool one"
+    )
 
     print("response", response)
     cost = litellm.completion_cost(
@@ -345,7 +361,8 @@ def test_whisper_openai():
     print(f"cost: {cost}")
     print(f"whisper dict: {litellm.model_cost['whisper-1']}")
     expected_cost = round(
-        litellm.model_cost["whisper-1"]["output_cost_per_second"] * _total_time_in_seconds,
+        litellm.model_cost["whisper-1"]["output_cost_per_second"]
+        * _total_time_in_seconds,
         5,
     )
     assert cost == expected_cost
@@ -365,12 +382,15 @@ def test_whisper_azure():
     _total_time_in_seconds = 3
 
     transcription._response_ms = _total_time_in_seconds * 1000
-    cost = litellm.completion_cost(model="azure/azure-whisper", completion_response=transcription)
+    cost = litellm.completion_cost(
+        model="azure/azure-whisper", completion_response=transcription
+    )
 
     print(f"cost: {cost}")
     print(f"whisper dict: {litellm.model_cost['whisper-1']}")
     expected_cost = round(
-        litellm.model_cost["whisper-1"]["output_cost_per_second"] * _total_time_in_seconds,
+        litellm.model_cost["whisper-1"]["output_cost_per_second"]
+        * _total_time_in_seconds,
         5,
     )
     assert cost == expected_cost
@@ -401,7 +421,9 @@ def test_dalle_3_azure_cost_tracking():
     response.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
     response._hidden_params = {"model": "dall-e-3", "model_id": None}
     print(f"response hidden params: {response._hidden_params}")
-    cost = litellm.completion_cost(completion_response=response, call_type="image_generation")
+    cost = litellm.completion_cost(
+        completion_response=response, call_type="image_generation"
+    )
     assert cost > 0
 
 
@@ -433,7 +455,9 @@ def test_replicate_llama3_cost_tracking():
         model="replicate/meta/meta-llama-3-8b-instruct",
         object="chat.completion",
         system_fingerprint=None,
-        usage=litellm.utils.Usage(prompt_tokens=48, completion_tokens=31, total_tokens=79),
+        usage=litellm.utils.Usage(
+            prompt_tokens=48, completion_tokens=31, total_tokens=79
+        ),
     )
     cost = litellm.completion_cost(
         completion_response=response,
@@ -443,8 +467,14 @@ def test_replicate_llama3_cost_tracking():
     print(f"cost: {cost}")
     cost = round(cost, 5)
     expected_cost = round(
-        litellm.model_cost["replicate/meta/meta-llama-3-8b-instruct"]["input_cost_per_token"] * 48
-        + litellm.model_cost["replicate/meta/meta-llama-3-8b-instruct"]["output_cost_per_token"] * 31,
+        litellm.model_cost["replicate/meta/meta-llama-3-8b-instruct"][
+            "input_cost_per_token"
+        ]
+        * 48
+        + litellm.model_cost["replicate/meta/meta-llama-3-8b-instruct"][
+            "output_cost_per_token"
+        ]
+        * 31,
         5,
     )
     assert cost == expected_cost
@@ -538,7 +568,9 @@ def test_together_ai_qwen_completion_cost():
         "custom_cost_per_second": None,
     }
 
-    response = litellm.cost_calculator.get_model_params_and_category(model_name="qwen/Qwen2-72B-Instruct")
+    response = litellm.cost_calculator.get_model_params_and_category(
+        model_name="qwen/Qwen2-72B-Instruct"
+    )
 
     assert response == "together-ai-41.1b-80b"
 
@@ -576,8 +608,12 @@ def test_gemini_completion_cost(above_128k, provider):
         ), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
             model_name, model_info
         )
-        input_cost = prompt_tokens * model_info["input_cost_per_token_above_128k_tokens"]
-        output_cost = output_tokens * model_info["output_cost_per_token_above_128k_tokens"]
+        input_cost = (
+            prompt_tokens * model_info["input_cost_per_token_above_128k_tokens"]
+        )
+        output_cost = (
+            output_tokens * model_info["output_cost_per_token_above_128k_tokens"]
+        )
     else:
         input_cost = prompt_tokens * model_info["input_cost_per_token"]
         output_cost = output_tokens * model_info["output_cost_per_token"]
@@ -674,3 +710,11 @@ def test_vertex_ai_claude_completion_cost():
     )
     predicted_cost = input_tokens * 0.000003 + 0.000015 * output_tokens
     assert cost == predicted_cost
+
+
+def test_vertex_ai_gemini_predict_cost():
+    model = "gemini-1.5-flash"
+    messages = [{"role": "user", "content": "Hey, hows it going???"}]
+    predictive_cost = completion_cost(model=model, messages=messages)
+
+    assert predictive_cost > 0
diff --git a/litellm/tests/test_prompt_factory.py b/litellm/tests/test_prompt_factory.py
index b3aafab6e..5a368f92d 100644
--- a/litellm/tests/test_prompt_factory.py
+++ b/litellm/tests/test_prompt_factory.py
@@ -1,7 +1,8 @@
 #### What this tests ####
 #    This tests if prompts are being correctly formatted
-import sys
 import os
+import sys
+
 import pytest
 
 sys.path.insert(0, os.path.abspath("../.."))
@@ -10,12 +11,13 @@ sys.path.insert(0, os.path.abspath("../.."))
 import litellm
 from litellm import completion
 from litellm.llms.prompt_templates.factory import (
-    anthropic_pt,
+    _bedrock_tools_pt,
     anthropic_messages_pt,
+    anthropic_pt,
     claude_2_1_pt,
+    convert_url_to_base64,
     llama_2_chat_pt,
     prompt_factory,
-    _bedrock_tools_pt,
 )
 
 
@@ -153,3 +155,11 @@ def test_bedrock_tool_calling_pt():
     converted_tools = _bedrock_tools_pt(tools=tools)
 
     print(converted_tools)
+
+
+def test_convert_url_to_img():
+    response_url = convert_url_to_base64(
+        url="https://images.pexels.com/photos/1319515/pexels-photo-1319515.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1"
+    )
+
+    assert "image/jpeg" in response_url