diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index 797606121..aa2d3ad76 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -34,6 +34,7 @@ from litellm.llms.databricks.cost_calculator import (
 from litellm.llms.fireworks_ai.cost_calculator import (
     cost_per_token as fireworks_ai_cost_per_token,
 )
+from litellm.llms.OpenAI.cost_calculation import cost_per_token as openai_cost_per_token
 from litellm.llms.together_ai.cost_calculator import get_model_params_and_category
 from litellm.rerank_api.types import RerankResponse
 from litellm.types.llms.openai import HttpxBinaryResponseContent
@@ -55,7 +56,7 @@ from litellm.utils import (
 def _cost_per_token_custom_pricing_helper(
     prompt_tokens: float = 0,
     completion_tokens: float = 0,
-    response_time_ms=None,
+    response_time_ms: Optional[float] = 0.0,
     ### CUSTOM PRICING ###
     custom_cost_per_token: Optional[CostPerToken] = None,
     custom_cost_per_second: Optional[float] = None,
@@ -79,7 +80,7 @@ def cost_per_token(
     model: str = "",
     prompt_tokens: int = 0,
     completion_tokens: int = 0,
-    response_time_ms=None,
+    response_time_ms: Optional[float] = 0.0,
     custom_llm_provider: Optional[str] = None,
     region_name=None,
     ### CHARACTER PRICING ###
@@ -198,7 +199,33 @@ def cost_per_token(
 
     # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
     print_verbose(f"Looking up model={model} in model_cost_map")
-    if custom_llm_provider == "vertex_ai":
+    if call_type == "speech" or call_type == "aspeech":
+        prompt_cost, completion_cost = _generic_cost_per_character(
+            model=model_without_prefix,
+            custom_llm_provider=custom_llm_provider,
+            prompt_characters=prompt_characters,
+            completion_characters=completion_characters,
+            custom_prompt_cost=None,
+            custom_completion_cost=0,
+        )
+        if prompt_cost is None or completion_cost is None:
+            raise ValueError(
+                "cost for tts call is None. prompt_cost={}, completion_cost={}, model={}, custom_llm_provider={}, prompt_characters={}, completion_characters={}".format(
+                    prompt_cost,
+                    completion_cost,
+                    model_without_prefix,
+                    custom_llm_provider,
+                    prompt_characters,
+                    completion_characters,
+                )
+            )
+        return prompt_cost, completion_cost
+    elif call_type == "arerank" or call_type == "rerank":
+        return rerank_cost(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+        )
+    elif custom_llm_provider == "vertex_ai":
         cost_router = google_cost_router(
             model=model_without_prefix,
             custom_llm_provider=custom_llm_provider,
@@ -226,6 +253,10 @@ def cost_per_token(
             )
     elif custom_llm_provider == "anthropic":
         return anthropic_cost_per_token(model=model, usage=usage_block)
+    elif custom_llm_provider == "openai":
+        return openai_cost_per_token(
+            model=model, usage=usage_block, response_time_ms=response_time_ms
+        )
     elif custom_llm_provider == "databricks":
         return databricks_cost_per_token(model=model, usage=usage_block)
     elif custom_llm_provider == "fireworks_ai":
@@ -237,32 +268,6 @@ def cost_per_token(
             prompt_tokens=prompt_tokens,
             completion_tokens=completion_tokens,
         )
-    elif call_type == "speech" or call_type == "aspeech":
-        prompt_cost, completion_cost = _generic_cost_per_character(
-            model=model_without_prefix,
-            custom_llm_provider=custom_llm_provider,
-            prompt_characters=prompt_characters,
-            completion_characters=completion_characters,
-            custom_prompt_cost=None,
-            custom_completion_cost=0,
-        )
-        if prompt_cost is None or completion_cost is None:
-            raise ValueError(
-                "cost for tts call is None. prompt_cost={}, completion_cost={}, model={}, custom_llm_provider={}, prompt_characters={}, completion_characters={}".format(
-                    prompt_cost,
-                    completion_cost,
-                    model_without_prefix,
-                    custom_llm_provider,
-                    prompt_characters,
-                    completion_characters,
-                )
-            )
-        return prompt_cost, completion_cost
-    elif call_type == "arerank" or call_type == "rerank":
-        return rerank_cost(
-            model=model,
-            custom_llm_provider=custom_llm_provider,
-        )
     elif model in model_cost_ref:
         print_verbose(f"Success: model={model} in model_cost_map")
         print_verbose(
@@ -461,7 +466,7 @@ def completion_cost(
     prompt="",
     messages: List = [],
     completion="",
-    total_time=0.0,  # used for replicate, sagemaker
+    total_time: Optional[float] = 0.0,  # used for replicate, sagemaker
     call_type: Literal[
         "embedding",
         "aembedding",
@@ -501,7 +506,7 @@ def completion_cost(
         model (str): Optional. The name of the language model used in the completion calls
         prompt (str): Optional. The input prompt passed to the llm
         completion (str): Optional. The output completion text from the llm
-        total_time (float): Optional. (Only used for Replicate LLMs) The total time used for the request in seconds
+        total_time (float, int): Optional. (Only used for Replicate LLMs) The total time used for the request in seconds
         custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
         custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
 
@@ -562,6 +567,13 @@ def completion_cost(
             completion_tokens = _usage.get("completion_tokens", 0)
             cache_creation_input_tokens = _usage.get("cache_creation_input_tokens", 0)
             cache_read_input_tokens = _usage.get("cache_read_input_tokens", 0)
+            if (
+                "prompt_tokens_details" in _usage
+                and _usage["prompt_tokens_details"] != {}
+                and _usage["prompt_tokens_details"]
+            ):
+                prompt_tokens_details = _usage.get("prompt_tokens_details", {})
+                cache_read_input_tokens = prompt_tokens_details.get("cached_tokens", 0)
 
             total_time = getattr(completion_response, "_response_ms", 0)
             verbose_logger.debug(
diff --git a/litellm/llms/OpenAI/cost_calculation.py b/litellm/llms/OpenAI/cost_calculation.py
new file mode 100644
index 000000000..6b8e5f41e
--- /dev/null
+++ b/litellm/llms/OpenAI/cost_calculation.py
@@ -0,0 +1,68 @@
+"""
+Helper util for handling openai-specific cost calculation
+- e.g.: prompt caching
+"""
+
+from typing import Optional, Tuple
+
+from litellm._logging import verbose_logger
+from litellm.types.utils import Usage
+from litellm.utils import get_model_info
+
+
+def cost_per_token(
+    model: str, usage: Usage, response_time_ms: Optional[float] = 0.0
+) -> Tuple[float, float]:
+    """
+    Calculates the cost per token for a given model, prompt tokens, and completion tokens.
+
+    Input:
+        - model: str, the model name without provider prefix
+        - usage: LiteLLM Usage block, containing anthropic caching information
+
+    Returns:
+        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
+    """
+    ## GET MODEL INFO
+    model_info = get_model_info(model=model, custom_llm_provider="openai")
+
+    ## CALCULATE INPUT COST
+    prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"]
+
+    ## CALCULATE OUTPUT COST
+    completion_cost: float = (
+        usage["completion_tokens"] * model_info["output_cost_per_token"]
+    )
+
+    ## Prompt Caching cost calculation
+    if model_info.get("cache_read_input_token_cost") is not None:
+        # Note: We read ._cache_read_input_tokens from the Usage - since cost_calculator.py standardizes the cache read tokens on usage._cache_read_input_tokens
+        prompt_cost += usage._cache_read_input_tokens * (
+            model_info.get("cache_read_input_token_cost", 0) or 0
+        )
+
+    ## Speech / Audio cost calculation
+    if (
+        "output_cost_per_second" in model_info
+        and model_info["output_cost_per_second"] is not None
+        and response_time_ms is not None
+    ):
+        verbose_logger.debug(
+            f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; response time: {response_time_ms}"
+        )
+        ## COST PER SECOND ##
+        prompt_cost = 0
+        completion_cost = model_info["output_cost_per_second"] * response_time_ms / 1000
+    elif (
+        "input_cost_per_second" in model_info
+        and model_info["input_cost_per_second"] is not None
+        and response_time_ms is not None
+    ):
+        verbose_logger.debug(
+            f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; response time: {response_time_ms}"
+        )
+        ## COST PER SECOND ##
+        prompt_cost = model_info["input_cost_per_second"] * response_time_ms / 1000
+        completion_cost = 0.0
+
+    return prompt_cost, completion_cost
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 35d48d9ff..4cabf6834 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -79,6 +79,8 @@ class ModelInfo(TypedDict, total=False):
     output_vector_size: Optional[int]
     output_cost_per_video_per_second: Optional[float]  # only for vertex ai models
     output_cost_per_audio_per_second: Optional[float]  # only for vertex ai models
+    output_cost_per_second: Optional[float]  # for OpenAI Speech models
+
     litellm_provider: Required[str]
     mode: Required[
         Literal[
diff --git a/litellm/utils.py b/litellm/utils.py
index e8dea5759..57166fd2a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -5092,6 +5092,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
                 output_cost_per_character_above_128k_tokens=_model_info.get(
                     "output_cost_per_character_above_128k_tokens", None
                 ),
+                output_cost_per_second=_model_info.get("output_cost_per_second", None),
                 output_vector_size=_model_info.get("output_vector_size", None),
                 litellm_provider=_model_info.get(
                     "litellm_provider", custom_llm_provider
diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py
index 8408a5051..02c87ed04 100644
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@@ -24,6 +24,7 @@ from litellm import (
     model_cost,
     open_ai_chat_completion_models,
 )
+from litellm.types.utils import PromptTokensDetails
 from litellm.litellm_core_utils.litellm_logging import CustomLogger
 
 
@@ -209,7 +210,9 @@ def test_cost_ft_gpt_35():
             usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38),
         )
 
-        cost = litellm.completion_cost(completion_response=resp)
+        cost = litellm.completion_cost(
+            completion_response=resp, custom_llm_provider="openai"
+        )
         print("\n Calculated Cost for ft:gpt-3.5", cost)
         input_cost = model_cost["ft:gpt-3.5-turbo"]["input_cost_per_token"]
         output_cost = model_cost["ft:gpt-3.5-turbo"]["output_cost_per_token"]
@@ -1330,6 +1333,90 @@ def test_completion_cost_vertex_llama3():
     assert cost == 0
 
 
+def test_cost_openai_prompt_caching():
+    from litellm.utils import Choices, Message, ModelResponse, Usage
+    from litellm import get_model_info
+
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    model = "gpt-4o-mini-2024-07-18"
+
+    ## LLM API CALL ## (MORE EXPENSIVE)
+    response_1 = ModelResponse(
+        id="chatcmpl-3f427194-0840-4d08-b571-56bfe38a5424",
+        choices=[
+            Choices(
+                finish_reason="length",
+                index=0,
+                message=Message(
+                    content="Hello! I'm doing well, thank you for",
+                    role="assistant",
+                    tool_calls=None,
+                    function_call=None,
+                ),
+            )
+        ],
+        created=1725036547,
+        model=model,
+        object="chat.completion",
+        system_fingerprint=None,
+        usage=Usage(
+            completion_tokens=10,
+            prompt_tokens=14,
+            total_tokens=24,
+        ),
+    )
+
+    ## PROMPT CACHE HIT ## (LESS EXPENSIVE)
+    response_2 = ModelResponse(
+        id="chatcmpl-3f427194-0840-4d08-b571-56bfe38a5424",
+        choices=[
+            Choices(
+                finish_reason="length",
+                index=0,
+                message=Message(
+                    content="Hello! I'm doing well, thank you for",
+                    role="assistant",
+                    tool_calls=None,
+                    function_call=None,
+                ),
+            )
+        ],
+        created=1725036547,
+        model=model,
+        object="chat.completion",
+        system_fingerprint=None,
+        usage=Usage(
+            completion_tokens=10,
+            prompt_tokens=0,
+            total_tokens=10,
+            prompt_tokens_details=PromptTokensDetails(
+                cached_tokens=14,
+            ),
+        ),
+    )
+
+    cost_1 = completion_cost(model=model, completion_response=response_1)
+    cost_2 = completion_cost(model=model, completion_response=response_2)
+    assert cost_1 > cost_2
+
+    model_info = get_model_info(model=model, custom_llm_provider="openai")
+    usage = response_2.usage
+
+    _expected_cost2 = (
+        usage.prompt_tokens * model_info["input_cost_per_token"]
+        + usage.completion_tokens * model_info["output_cost_per_token"]
+        + usage.prompt_tokens_details.cached_tokens
+        * model_info["cache_read_input_token_cost"]
+    )
+
+    print("_expected_cost2", _expected_cost2)
+    print("cost_2", cost_2)
+
+    assert cost_2 == _expected_cost2
+
+
 @pytest.mark.parametrize(
     "model",
     [