diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index 9c980aa3a..789811b46 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -6,6 +6,9 @@ from typing import List, Literal, Optional, Tuple, Union
 import litellm
 import litellm._logging
 from litellm import verbose_logger
+from litellm.litellm_core_utils.llm_cost_calc.google import (
+    cost_per_character as google_cost_per_character,
+)
 from litellm.litellm_core_utils.llm_cost_calc.google import (
     cost_per_token as google_cost_per_token,
 )
@@ -23,8 +26,8 @@ from litellm.utils import (
 
 
 def _cost_per_token_custom_pricing_helper(
-    prompt_tokens=0,
-    completion_tokens=0,
+    prompt_tokens: float = 0,
+    completion_tokens: float = 0,
     response_time_ms=None,
     ### CUSTOM PRICING ###
     custom_cost_per_token: Optional[CostPerToken] = None,
@@ -52,6 +55,9 @@ def cost_per_token(
     response_time_ms=None,
     custom_llm_provider: Optional[str] = None,
     region_name=None,
+    ### CHARACTER PRICING ###
+    prompt_characters: float = 0,
+    completion_characters: float = 0,
     ### CUSTOM PRICING ###
     custom_cost_per_token: Optional[CostPerToken] = None,
     custom_cost_per_second: Optional[float] = None,
@@ -64,6 +70,8 @@ def cost_per_token(
         prompt_tokens (int): The number of tokens in the prompt.
         completion_tokens (int): The number of tokens in the completion.
         response_time (float): The amount of time, in milliseconds, it took the call to complete.
+        prompt_characters (float): The number of characters in the prompt. Used for vertex ai cost calculation.
+        completion_characters (float): The number of characters in the completion response. Used for vertex ai cost calculation.
         custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
         custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
         custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
@@ -127,7 +135,16 @@ def cost_per_token(
 
     # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
     print_verbose(f"Looking up model={model} in model_cost_map")
-    if custom_llm_provider == "vertex_ai" or custom_llm_provider == "gemini":
+    if custom_llm_provider == "vertex_ai":
+        return google_cost_per_character(
+            model=model_without_prefix,
+            custom_llm_provider=custom_llm_provider,
+            prompt_characters=prompt_characters,
+            completion_characters=completion_characters,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+        )
+    elif custom_llm_provider == "gemini":
         return google_cost_per_token(
             model=model_without_prefix,
             custom_llm_provider=custom_llm_provider,
@@ -378,7 +395,9 @@ def completion_cost(
             model = "dall-e-2"  # for dall-e-2, azure expects an empty model name
         # Handle Inputs to completion_cost
         prompt_tokens = 0
+        prompt_characters = 0
         completion_tokens = 0
+        completion_characters = 0
         custom_llm_provider = None
         if completion_response is not None:
             # get input/output tokens from completion_response
@@ -495,6 +514,30 @@ def completion_cost(
                 f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
             )
 
+        if (
+            custom_llm_provider is not None
+            and custom_llm_provider == "vertex_ai"
+            and completion_response is not None
+            and isinstance(completion_response, ModelResponse)
+        ):
+            # Calculate the prompt characters + response characters
+            if len("messages") > 0:
+                prompt_string = litellm.utils.get_formatted_prompt(
+                    data={"messages": messages}, call_type="completion"
+                )
+            else:
+                prompt_string = ""
+
+            prompt_characters = litellm.utils._count_characters(text=prompt_string)
+
+            completion_string = litellm.utils.get_response_string(
+                response_obj=completion_response
+            )
+
+            completion_characters = litellm.utils._count_characters(
+                text=completion_string
+            )
+
         (
             prompt_tokens_cost_usd_dollar,
             completion_tokens_cost_usd_dollar,
@@ -507,6 +550,8 @@ def completion_cost(
             region_name=region_name,
             custom_cost_per_second=custom_cost_per_second,
             custom_cost_per_token=custom_cost_per_token,
+            prompt_characters=prompt_characters,
+            completion_characters=completion_characters,
         )
         _final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
         print_verbose(
diff --git a/litellm/litellm_core_utils/llm_cost_calc/google.py b/litellm/litellm_core_utils/llm_cost_calc/google.py
index 747860070..2c958cf88 100644
--- a/litellm/litellm_core_utils/llm_cost_calc/google.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/google.py
@@ -1,8 +1,10 @@
 # What is this?
 ## Cost calculation for Google AI Studio / Vertex AI models
-from typing import Literal, Tuple
+import traceback
+from typing import List, Literal, Optional, Tuple
 
 import litellm
+from litellm import verbose_logger
 
 """
 Gemini pricing covers: 
@@ -12,6 +14,12 @@ Gemini pricing covers:
 - video
 """
 
+"""
+Vertex AI -> character based pricing 
+
+Google AI Studio -> token based pricing
+"""
+
 models_without_dynamic_pricing = ["gemini-1.0-pro", "gemini-pro"]
 
 
@@ -21,6 +29,124 @@ def _is_above_128k(tokens: float) -> bool:
     return False
 
 
+def cost_per_character(
+    model: str,
+    custom_llm_provider: str,
+    prompt_tokens: float,
+    completion_tokens: float,
+    prompt_characters: float,
+    completion_characters: float,
+) -> Tuple[float, float]:
+    """
+    Calculates the cost per character for a given VertexAI model, input messages, and response object.
+
+    Input:
+        - model: str, the model name without provider prefix
+        - custom_llm_provider: str, "vertex_ai-*"
+        - prompt_characters: float, the number of input characters
+        - completion_characters: float, the number of output characters
+
+    Returns:
+        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
+
+    Raises:
+        Exception if model requires >128k pricing, but model cost not mapped
+    """
+    model_info = litellm.get_model_info(
+        model=model, custom_llm_provider=custom_llm_provider
+    )
+
+    ## GET MODEL INFO
+    model_info = litellm.get_model_info(
+        model=model, custom_llm_provider=custom_llm_provider
+    )
+
+    ## CALCULATE INPUT COST
+    try:
+        if (
+            _is_above_128k(tokens=prompt_characters * 4)  # 1 token = 4 char
+            and model not in models_without_dynamic_pricing
+        ):
+            ## check if character pricing, else default to token pricing
+            assert (
+                "input_cost_per_character_above_128k_tokens" in model_info
+                and model_info["input_cost_per_character_above_128k_tokens"] is not None
+            ), "model info for model={} does not have 'input_cost_per_character_above_128k_tokens'-pricing for > 128k tokens\nmodel_info={}".format(
+                model, model_info
+            )
+            prompt_cost = (
+                prompt_characters
+                * model_info["input_cost_per_character_above_128k_tokens"]
+            )
+        else:
+            assert (
+                "input_cost_per_character" in model_info
+                and model_info["input_cost_per_character"] is not None
+            ), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
+                model, model_info
+            )
+            prompt_cost = prompt_characters * model_info["input_cost_per_character"]
+    except Exception as e:
+        verbose_logger.error(
+            "litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Exception occured - {}\n{}\n\
+                Defaulting to (cost_per_token * 4) calculation for prompt_cost".format(
+                str(e), traceback.format_exc()
+            )
+        )
+        initial_prompt_cost, _ = cost_per_token(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+        )
+
+        prompt_cost = initial_prompt_cost * 4
+
+    ## CALCULATE OUTPUT COST
+    try:
+        if (
+            _is_above_128k(tokens=completion_characters * 4)  # 1 token = 4 char
+            and model not in models_without_dynamic_pricing
+        ):
+            assert (
+                "output_cost_per_character_above_128k_tokens" in model_info
+                and model_info["output_cost_per_character_above_128k_tokens"]
+                is not None
+            ), "model info for model={} does not have 'output_cost_per_character_above_128k_tokens' pricing\nmodel_info={}".format(
+                model, model_info
+            )
+            completion_cost = (
+                completion_tokens
+                * model_info["output_cost_per_character_above_128k_tokens"]
+            )
+        else:
+            assert (
+                "output_cost_per_character" in model_info
+                and model_info["output_cost_per_character"] is not None
+            ), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
+                model, model_info
+            )
+            completion_cost = (
+                completion_tokens * model_info["output_cost_per_character"]
+            )
+    except Exception as e:
+        verbose_logger.error(
+            "litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Exception occured - {}\n{}\n\
+                Defaulting to (cost_per_token * 4) calculation for completion_cost".format(
+                str(e), traceback.format_exc()
+            )
+        )
+        _, initial_completion_cost = cost_per_token(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+        )
+
+        completion_cost = initial_completion_cost * 4
+    return prompt_cost, completion_cost
+
+
 def cost_per_token(
     model: str,
     custom_llm_provider: str,
@@ -53,7 +179,8 @@ def cost_per_token(
         and model not in models_without_dynamic_pricing
     ):
         assert (
-            model_info["input_cost_per_token_above_128k_tokens"] is not None
+            "input_cost_per_token_above_128k_tokens" in model_info
+            and model_info["input_cost_per_token_above_128k_tokens"] is not None
         ), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
             model, model_info
         )
@@ -69,7 +196,8 @@ def cost_per_token(
         and model not in models_without_dynamic_pricing
     ):
         assert (
-            model_info["output_cost_per_token_above_128k_tokens"] is not None
+            "output_cost_per_token_above_128k_tokens" in model_info
+            and model_info["output_cost_per_token_above_128k_tokens"] is not None
         ), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
             model, model_info
         )
diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py
index b7c85679d..42f59c5f5 100644
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@@ -576,7 +576,7 @@ def test_together_ai_qwen_completion_cost():
 
 
 @pytest.mark.parametrize("above_128k", [False, True])
-@pytest.mark.parametrize("provider", ["vertex_ai", "gemini"])
+@pytest.mark.parametrize("provider", ["gemini"])
 def test_gemini_completion_cost(above_128k, provider):
     """
     Check if cost correctly calculated for gemini models based on context window
@@ -628,3 +628,35 @@ def test_gemini_completion_cost(above_128k, provider):
 
     assert calculated_input_cost == input_cost
     assert calculated_output_cost == output_cost
+
+
+def _count_characters(text):
+    # Remove white spaces and count characters
+    filtered_text = "".join(char for char in text if not char.isspace())
+    return len(filtered_text)
+
+
+def test_vertex_ai_completion_cost():
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    text = "The quick brown fox jumps over the lazy dog."
+    characters = _count_characters(text=text)
+
+    model_info = litellm.get_model_info(model="gemini-1.5-flash")
+
+    print("\nExpected model info:\n{}\n\n".format(model_info))
+
+    expected_input_cost = characters * model_info["input_cost_per_character"]
+
+    ## CALCULATED COST
+    calculated_input_cost, calculated_output_cost = cost_per_token(
+        model="gemini-1.5-flash",
+        custom_llm_provider="vertex_ai",
+        prompt_characters=characters,
+        completion_characters=0,
+    )
+
+    assert round(expected_input_cost, 6) == round(calculated_input_cost, 6)
+    print("expected_input_cost: {}".format(expected_input_cost))
+    print("calculated_input_cost: {}".format(calculated_input_cost))
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 142eef300..da6da4dc9 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -44,15 +44,25 @@ class ModelInfo(TypedDict, total=False):
     max_input_tokens: Required[Optional[int]]
     max_output_tokens: Required[Optional[int]]
     input_cost_per_token: Required[float]
-    input_cost_per_token_above_128k_tokens: Optional[float]
-    input_cost_per_image: Optional[float]
-    input_cost_per_audio_per_second: Optional[float]
-    input_cost_per_video_per_second: Optional[float]
+    input_cost_per_character: Optional[float]  # only for vertex ai models
+    input_cost_per_token_above_128k_tokens: Optional[float]  # only for vertex ai models
+    input_cost_per_character_above_128k_tokens: Optional[
+        float
+    ]  # only for vertex ai models
+    input_cost_per_image: Optional[float]  # only for vertex ai models
+    input_cost_per_audio_per_second: Optional[float]  # only for vertex ai models
+    input_cost_per_video_per_second: Optional[float]  # only for vertex ai models
     output_cost_per_token: Required[float]
-    output_cost_per_token_above_128k_tokens: Optional[float]
+    output_cost_per_character: Required[float]  # only for vertex ai models
+    output_cost_per_token_above_128k_tokens: Optional[
+        float
+    ]  # only for vertex ai models
+    output_cost_per_character_above_128k_tokens: Optional[
+        float
+    ]  # only for vertex ai models
     output_cost_per_image: Optional[float]
-    output_cost_per_video_per_second: Optional[float]
-    output_cost_per_audio_per_second: Optional[float]
+    output_cost_per_video_per_second: Optional[float]  # only for vertex ai models
+    output_cost_per_audio_per_second: Optional[float]  # only for vertex ai models
     litellm_provider: Required[str]
     mode: Required[
         Literal[
diff --git a/litellm/utils.py b/litellm/utils.py
index 0623e26b3..2cd19be54 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -3810,6 +3810,12 @@ def get_supported_openai_params(
     return None
 
 
+def _count_characters(text: str) -> int:
+    # Remove white spaces and count characters
+    filtered_text = "".join(char for char in text if not char.isspace())
+    return len(filtered_text)
+
+
 def get_formatted_prompt(
     data: dict,
     call_type: Literal[
@@ -3828,9 +3834,20 @@ def get_formatted_prompt(
     """
     prompt = ""
     if call_type == "completion":
-        for m in data["messages"]:
-            if "content" in m and isinstance(m["content"], str):
-                prompt += m["content"]
+        for message in data["messages"]:
+            if message.get("content", None) is not None:
+                content = message.get("content")
+                if isinstance(content, str):
+                    prompt += message["content"]
+                elif isinstance(content, List):
+                    for c in content:
+                        if c["type"] == "text":
+                            prompt += c["text"]
+            if "tool_calls" in message:
+                for tool_call in message["tool_calls"]:
+                    if "function" in tool_call:
+                        function_arguments = tool_call["function"]["arguments"]
+                        prompt += function_arguments
     elif call_type == "text_completion":
         prompt = data["prompt"]
     elif call_type == "embedding" or call_type == "moderation":
@@ -3847,6 +3864,17 @@ def get_formatted_prompt(
     return prompt
 
 
+def get_response_string(response_obj: ModelResponse) -> str:
+    _choices: List[Choices] = response_obj.choices  # type: ignore
+
+    response_str = ""
+    for choice in _choices:
+        if choice.message.content is not None:
+            response_str += choice.message.content
+
+    return response_str
+
+
 def _is_non_openai_azure_model(model: str) -> bool:
     try:
         model_name = model.split("/", 1)[1]
@@ -4392,13 +4420,22 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
                     max_input_tokens=_model_info.get("max_input_tokens", None),
                     max_output_tokens=_model_info.get("max_output_tokens", None),
                     input_cost_per_token=_model_info.get("input_cost_per_token", 0),
+                    input_cost_per_character=_model_info.get(
+                        "input_cost_per_character", None
+                    ),
                     input_cost_per_token_above_128k_tokens=_model_info.get(
                         "input_cost_per_token_above_128k_tokens", None
                     ),
                     output_cost_per_token=_model_info.get("output_cost_per_token", 0),
+                    output_cost_per_character=_model_info.get(
+                        "output_cost_per_character", None
+                    ),
                     output_cost_per_token_above_128k_tokens=_model_info.get(
                         "output_cost_per_token_above_128k_tokens", None
                     ),
+                    output_cost_per_character_above_128k_tokens=_model_info.get(
+                        "output_cost_per_character_above_128k_tokens", None
+                    ),
                     litellm_provider=_model_info.get(
                         "litellm_provider", custom_llm_provider
                     ),
@@ -4426,13 +4463,22 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
                     max_input_tokens=_model_info.get("max_input_tokens", None),
                     max_output_tokens=_model_info.get("max_output_tokens", None),
                     input_cost_per_token=_model_info.get("input_cost_per_token", 0),
+                    input_cost_per_character=_model_info.get(
+                        "input_cost_per_character", None
+                    ),
                     input_cost_per_token_above_128k_tokens=_model_info.get(
                         "input_cost_per_token_above_128k_tokens", None
                     ),
                     output_cost_per_token=_model_info.get("output_cost_per_token", 0),
+                    output_cost_per_character=_model_info.get(
+                        "output_cost_per_character", None
+                    ),
                     output_cost_per_token_above_128k_tokens=_model_info.get(
                         "output_cost_per_token_above_128k_tokens", None
                     ),
+                    output_cost_per_character_above_128k_tokens=_model_info.get(
+                        "output_cost_per_character_above_128k_tokens", None
+                    ),
                     litellm_provider=_model_info.get(
                         "litellm_provider", custom_llm_provider
                     ),
@@ -4460,13 +4506,22 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
                     max_input_tokens=_model_info.get("max_input_tokens", None),
                     max_output_tokens=_model_info.get("max_output_tokens", None),
                     input_cost_per_token=_model_info.get("input_cost_per_token", 0),
+                    input_cost_per_character=_model_info.get(
+                        "input_cost_per_character", None
+                    ),
                     input_cost_per_token_above_128k_tokens=_model_info.get(
                         "input_cost_per_token_above_128k_tokens", None
                     ),
                     output_cost_per_token=_model_info.get("output_cost_per_token", 0),
+                    output_cost_per_character=_model_info.get(
+                        "output_cost_per_character", None
+                    ),
                     output_cost_per_token_above_128k_tokens=_model_info.get(
                         "output_cost_per_token_above_128k_tokens", None
                     ),
+                    output_cost_per_character_above_128k_tokens=_model_info.get(
+                        "output_cost_per_character_above_128k_tokens", None
+                    ),
                     litellm_provider=_model_info.get(
                         "litellm_provider", custom_llm_provider
                     ),