diff --git a/litellm/__init__.py b/litellm/__init__.py
index a59484b035..8cdde24a6a 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -950,6 +950,12 @@ openaiOSeriesConfig = OpenAIOSeriesConfig()
 from .llms.openai.chat.gpt_transformation import (
     OpenAIGPTConfig,
 )
+from .llms.openai.transcriptions.whisper_transformation import (
+    OpenAIWhisperAudioTranscriptionConfig,
+)
+from .llms.openai.transcriptions.gpt_transformation import (
+    OpenAIGPTAudioTranscriptionConfig,
+)
 
 openAIGPTConfig = OpenAIGPTConfig()
 from .llms.openai.chat.gpt_audio_transformation import (
diff --git a/litellm/litellm_core_utils/get_supported_openai_params.py b/litellm/litellm_core_utils/get_supported_openai_params.py
index 3d4f8cef6f..ccbdb331fd 100644
--- a/litellm/litellm_core_utils/get_supported_openai_params.py
+++ b/litellm/litellm_core_utils/get_supported_openai_params.py
@@ -79,6 +79,22 @@ def get_supported_openai_params(  # noqa: PLR0915
     elif custom_llm_provider == "maritalk":
         return litellm.MaritalkConfig().get_supported_openai_params(model=model)
     elif custom_llm_provider == "openai":
+        if request_type == "transcription":
+            transcription_provider_config = (
+                litellm.ProviderConfigManager.get_provider_audio_transcription_config(
+                    model=model, provider=LlmProviders.OPENAI
+                )
+            )
+            if isinstance(
+                transcription_provider_config, litellm.OpenAIGPTAudioTranscriptionConfig
+            ):
+                return transcription_provider_config.get_supported_openai_params(
+                    model=model
+                )
+            else:
+                raise ValueError(
+                    f"Unsupported provider config: {transcription_provider_config} for model: {model}"
+                )
         return litellm.OpenAIConfig().get_supported_openai_params(model=model)
     elif custom_llm_provider == "azure":
         if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
diff --git a/litellm/llms/base_llm/audio_transcription/transformation.py b/litellm/llms/base_llm/audio_transcription/transformation.py
index e550c574e2..d48edb041c 100644
--- a/litellm/llms/base_llm/audio_transcription/transformation.py
+++ b/litellm/llms/base_llm/audio_transcription/transformation.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any, List, Optional
+from typing import TYPE_CHECKING, Any, List, Optional, Union
 
 import httpx
 
@@ -8,7 +8,7 @@ from litellm.types.llms.openai import (
     AllMessageValues,
     OpenAIAudioTranscriptionOptionalParams,
 )
-from litellm.types.utils import ModelResponse
+from litellm.types.utils import FileTypes, ModelResponse
 
 if TYPE_CHECKING:
     from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
@@ -42,6 +42,18 @@ class BaseAudioTranscriptionConfig(BaseConfig, ABC):
         """
         return api_base or ""
 
+    @abstractmethod
+    def transform_audio_transcription_request(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> Union[dict, bytes]:
+        raise NotImplementedError(
+            "AudioTranscriptionConfig needs a request transformation for audio transcription models"
+        )
+
     def transform_request(
         self,
         model: str,
diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py
index 00caf55207..872626c747 100644
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@@ -1,4 +1,3 @@
-import io
 import json
 from typing import TYPE_CHECKING, Any, Coroutine, Dict, Optional, Tuple, Union
 
@@ -8,6 +7,9 @@ import litellm
 import litellm.litellm_core_utils
 import litellm.types
 import litellm.types.utils
+from litellm.llms.base_llm.audio_transcription.transformation import (
+    BaseAudioTranscriptionConfig,
+)
 from litellm.llms.base_llm.chat.transformation import BaseConfig
 from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
 from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
@@ -852,54 +854,12 @@ class BaseLLMHTTPHandler:
             request_data=request_data,
         )
 
-    def handle_audio_file(self, audio_file: FileTypes) -> bytes:
-        """
-        Processes the audio file input based on its type and returns the binary data.
-
-        Args:
-            audio_file: Can be a file path (str), a tuple (filename, file_content), or binary data (bytes).
-
-        Returns:
-            The binary data of the audio file.
-        """
-        binary_data: bytes  # Explicitly declare the type
-
-        # Handle the audio file based on type
-        if isinstance(audio_file, str):
-            # If it's a file path
-            with open(audio_file, "rb") as f:
-                binary_data = f.read()  # `f.read()` always returns `bytes`
-        elif isinstance(audio_file, tuple):
-            # Handle tuple case
-            _, file_content = audio_file[:2]
-            if isinstance(file_content, str):
-                with open(file_content, "rb") as f:
-                    binary_data = f.read()  # `f.read()` always returns `bytes`
-            elif isinstance(file_content, bytes):
-                binary_data = file_content
-            else:
-                raise TypeError(
-                    f"Unexpected type in tuple: {type(file_content)}. Expected str or bytes."
-                )
-        elif isinstance(audio_file, bytes):
-            # Assume it's already binary data
-            binary_data = audio_file
-        elif isinstance(audio_file, io.BufferedReader) or isinstance(
-            audio_file, io.BytesIO
-        ):
-            # Handle file-like objects
-            binary_data = audio_file.read()
-
-        else:
-            raise TypeError(f"Unsupported type for audio_file: {type(audio_file)}")
-
-        return binary_data
-
     def audio_transcriptions(
         self,
         model: str,
         audio_file: FileTypes,
         optional_params: dict,
+        litellm_params: dict,
         model_response: TranscriptionResponse,
         timeout: float,
         max_retries: int,
@@ -910,11 +870,8 @@ class BaseLLMHTTPHandler:
         client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
         atranscription: bool = False,
         headers: dict = {},
-        litellm_params: dict = {},
+        provider_config: Optional[BaseAudioTranscriptionConfig] = None,
     ) -> TranscriptionResponse:
-        provider_config = ProviderConfigManager.get_provider_audio_transcription_config(
-            model=model, provider=litellm.LlmProviders(custom_llm_provider)
-        )
         if provider_config is None:
             raise ValueError(
                 f"No provider config found for model: {model} and provider: {custom_llm_provider}"
@@ -938,7 +895,18 @@ class BaseLLMHTTPHandler:
         )
 
         # Handle the audio file based on type
-        binary_data = self.handle_audio_file(audio_file)
+        data = provider_config.transform_audio_transcription_request(
+            model=model,
+            audio_file=audio_file,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+        )
+        binary_data: Optional[bytes] = None
+        json_data: Optional[dict] = None
+        if isinstance(data, bytes):
+            binary_data = data
+        else:
+            json_data = data
 
         try:
             # Make the POST request
@@ -946,6 +914,7 @@ class BaseLLMHTTPHandler:
                 url=complete_url,
                 headers=headers,
                 content=binary_data,
+                json=json_data,
                 timeout=timeout,
             )
         except Exception as e:
diff --git a/litellm/llms/deepgram/audio_transcription/transformation.py b/litellm/llms/deepgram/audio_transcription/transformation.py
index 06296736ea..90720a77f7 100644
--- a/litellm/llms/deepgram/audio_transcription/transformation.py
+++ b/litellm/llms/deepgram/audio_transcription/transformation.py
@@ -2,6 +2,7 @@
 Translates from OpenAI's `/v1/audio/transcriptions` to Deepgram's `/v1/listen`
 """
 
+import io
 from typing import List, Optional, Union
 
 from httpx import Headers, Response
@@ -12,7 +13,7 @@ from litellm.types.llms.openai import (
     AllMessageValues,
     OpenAIAudioTranscriptionOptionalParams,
 )
-from litellm.types.utils import TranscriptionResponse
+from litellm.types.utils import FileTypes, TranscriptionResponse
 
 from ...base_llm.audio_transcription.transformation import (
     BaseAudioTranscriptionConfig,
@@ -47,6 +48,55 @@ class DeepgramAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
             message=error_message, status_code=status_code, headers=headers
         )
 
+    def transform_audio_transcription_request(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> Union[dict, bytes]:
+        """
+        Processes the audio file input based on its type and returns the binary data.
+
+        Args:
+            audio_file: Can be a file path (str), a tuple (filename, file_content), or binary data (bytes).
+
+        Returns:
+            The binary data of the audio file.
+        """
+        binary_data: bytes  # Explicitly declare the type
+
+        # Handle the audio file based on type
+        if isinstance(audio_file, str):
+            # If it's a file path
+            with open(audio_file, "rb") as f:
+                binary_data = f.read()  # `f.read()` always returns `bytes`
+        elif isinstance(audio_file, tuple):
+            # Handle tuple case
+            _, file_content = audio_file[:2]
+            if isinstance(file_content, str):
+                with open(file_content, "rb") as f:
+                    binary_data = f.read()  # `f.read()` always returns `bytes`
+            elif isinstance(file_content, bytes):
+                binary_data = file_content
+            else:
+                raise TypeError(
+                    f"Unexpected type in tuple: {type(file_content)}. Expected str or bytes."
+                )
+        elif isinstance(audio_file, bytes):
+            # Assume it's already binary data
+            binary_data = audio_file
+        elif isinstance(audio_file, io.BufferedReader) or isinstance(
+            audio_file, io.BytesIO
+        ):
+            # Handle file-like objects
+            binary_data = audio_file.read()
+
+        else:
+            raise TypeError(f"Unsupported type for audio_file: {type(audio_file)}")
+
+        return binary_data
+
     def transform_audio_transcription_response(
         self,
         model: str,
diff --git a/litellm/llms/fireworks_ai/audio_transcription/transformation.py b/litellm/llms/fireworks_ai/audio_transcription/transformation.py
index 8f35705299..00bb5f2679 100644
--- a/litellm/llms/fireworks_ai/audio_transcription/transformation.py
+++ b/litellm/llms/fireworks_ai/audio_transcription/transformation.py
@@ -2,27 +2,16 @@ from typing import List
 
 from litellm.types.llms.openai import OpenAIAudioTranscriptionOptionalParams
 
-from ...base_llm.audio_transcription.transformation import BaseAudioTranscriptionConfig
+from ...openai.transcriptions.whisper_transformation import (
+    OpenAIWhisperAudioTranscriptionConfig,
+)
 from ..common_utils import FireworksAIMixin
 
 
 class FireworksAIAudioTranscriptionConfig(
-    FireworksAIMixin, BaseAudioTranscriptionConfig
+    FireworksAIMixin, OpenAIWhisperAudioTranscriptionConfig
 ):
     def get_supported_openai_params(
         self, model: str
     ) -> List[OpenAIAudioTranscriptionOptionalParams]:
         return ["language", "prompt", "response_format", "timestamp_granularities"]
-
-    def map_openai_params(
-        self,
-        non_default_params: dict,
-        optional_params: dict,
-        model: str,
-        drop_params: bool,
-    ) -> dict:
-        supported_params = self.get_supported_openai_params(model)
-        for k, v in non_default_params.items():
-            if k in supported_params:
-                optional_params[k] = v
-        return optional_params
diff --git a/litellm/llms/openai/transcriptions/gpt_transformation.py b/litellm/llms/openai/transcriptions/gpt_transformation.py
new file mode 100644
index 0000000000..796e10f515
--- /dev/null
+++ b/litellm/llms/openai/transcriptions/gpt_transformation.py
@@ -0,0 +1,34 @@
+from typing import List
+
+from litellm.types.llms.openai import OpenAIAudioTranscriptionOptionalParams
+from litellm.types.utils import FileTypes
+
+from .whisper_transformation import OpenAIWhisperAudioTranscriptionConfig
+
+
+class OpenAIGPTAudioTranscriptionConfig(OpenAIWhisperAudioTranscriptionConfig):
+    def get_supported_openai_params(
+        self, model: str
+    ) -> List[OpenAIAudioTranscriptionOptionalParams]:
+        """
+        Get the supported OpenAI params for the `gpt-4o-transcribe` models
+        """
+        return [
+            "language",
+            "prompt",
+            "response_format",
+            "temperature",
+            "include",
+        ]
+
+    def transform_audio_transcription_request(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> dict:
+        """
+        Transform the audio transcription request
+        """
+        return {"model": model, "file": audio_file, **optional_params}
diff --git a/litellm/llms/openai/transcriptions/handler.py b/litellm/llms/openai/transcriptions/handler.py
index d9dd3c123b..78a913cbf3 100644
--- a/litellm/llms/openai/transcriptions/handler.py
+++ b/litellm/llms/openai/transcriptions/handler.py
@@ -7,6 +7,9 @@ from pydantic import BaseModel
 import litellm
 from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.base_llm.audio_transcription.transformation import (
+    BaseAudioTranscriptionConfig,
+)
 from litellm.types.utils import FileTypes
 from litellm.utils import (
     TranscriptionResponse,
@@ -75,6 +78,7 @@ class OpenAIAudioTranscription(OpenAIChatCompletion):
         model: str,
         audio_file: FileTypes,
         optional_params: dict,
+        litellm_params: dict,
         model_response: TranscriptionResponse,
         timeout: float,
         max_retries: int,
@@ -83,16 +87,24 @@ class OpenAIAudioTranscription(OpenAIChatCompletion):
         api_base: Optional[str],
         client=None,
         atranscription: bool = False,
+        provider_config: Optional[BaseAudioTranscriptionConfig] = None,
     ) -> TranscriptionResponse:
-        data = {"model": model, "file": audio_file, **optional_params}
-
-        if "response_format" not in data or (
-            data["response_format"] == "text" or data["response_format"] == "json"
-        ):
-            data["response_format"] = (
-                "verbose_json"  # ensures 'duration' is received - used for cost calculation
+        """
+        Handle audio transcription request
+        """
+        if provider_config is not None:
+            data = provider_config.transform_audio_transcription_request(
+                model=model,
+                audio_file=audio_file,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
             )
 
+            if isinstance(data, bytes):
+                raise ValueError("OpenAI transformation route requires a dict")
+        else:
+            data = {"model": model, "file": audio_file, **optional_params}
+
         if atranscription is True:
             return self.async_audio_transcriptions(  # type: ignore
                 audio_file=audio_file,
diff --git a/litellm/llms/openai/transcriptions/whisper_transformation.py b/litellm/llms/openai/transcriptions/whisper_transformation.py
new file mode 100644
index 0000000000..5a7d6481a8
--- /dev/null
+++ b/litellm/llms/openai/transcriptions/whisper_transformation.py
@@ -0,0 +1,97 @@
+from typing import List, Optional, Union
+
+from httpx import Headers
+
+from litellm.llms.base_llm.audio_transcription.transformation import (
+    BaseAudioTranscriptionConfig,
+)
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    OpenAIAudioTranscriptionOptionalParams,
+)
+from litellm.types.utils import FileTypes
+
+from ..common_utils import OpenAIError
+
+
+class OpenAIWhisperAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
+    def get_supported_openai_params(
+        self, model: str
+    ) -> List[OpenAIAudioTranscriptionOptionalParams]:
+        """
+        Get the supported OpenAI params for the `whisper-1` models
+        """
+        return [
+            "language",
+            "prompt",
+            "response_format",
+            "temperature",
+            "timestamp_granularities",
+        ]
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        """
+        Map the OpenAI params to the Whisper params
+        """
+        supported_params = self.get_supported_openai_params(model)
+        for k, v in non_default_params.items():
+            if k in supported_params:
+                optional_params[k] = v
+        return optional_params
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        api_key = api_key or get_secret_str("OPENAI_API_KEY")
+
+        auth_header = {
+            "Authorization": f"Bearer {api_key}",
+        }
+
+        headers.update(auth_header)
+        return headers
+
+    def transform_audio_transcription_request(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> dict:
+        """
+        Transform the audio transcription request
+        """
+
+        data = {"model": model, "file": audio_file, **optional_params}
+
+        if "response_format" not in data or (
+            data["response_format"] == "text" or data["response_format"] == "json"
+        ):
+            data["response_format"] = (
+                "verbose_json"  # ensures 'duration' is received - used for cost calculation
+            )
+
+        return data
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, Headers]
+    ) -> BaseLLMException:
+        return OpenAIError(
+            status_code=status_code,
+            message=error_message,
+            headers=headers,
+        )
diff --git a/litellm/main.py b/litellm/main.py
index 94e19aab0c..b2732c165c 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -5095,6 +5095,12 @@ def transcription(
     response: Optional[
         Union[TranscriptionResponse, Coroutine[Any, Any, TranscriptionResponse]]
     ] = None
+
+    provider_config = ProviderConfigManager.get_provider_audio_transcription_config(
+        model=model,
+        provider=LlmProviders(custom_llm_provider),
+    )
+
     if custom_llm_provider == "azure":
         # azure configs
         api_base = api_base or litellm.api_base or get_secret_str("AZURE_API_BASE")
@@ -5161,12 +5167,15 @@ def transcription(
             max_retries=max_retries,
             api_base=api_base,
             api_key=api_key,
+            provider_config=provider_config,
+            litellm_params=litellm_params_dict,
         )
     elif custom_llm_provider == "deepgram":
         response = base_llm_http_handler.audio_transcriptions(
             model=model,
             audio_file=file,
             optional_params=optional_params,
+            litellm_params=litellm_params_dict,
             model_response=model_response,
             atranscription=atranscription,
             client=(
@@ -5185,6 +5194,7 @@ def transcription(
             api_key=api_key,
             custom_llm_provider="deepgram",
             headers={},
+            provider_config=provider_config,
         )
     if response is None:
         raise ValueError("Unmapped provider passed in. Unable to get the response.")
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index b8e32a24ce..da780f8353 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -1176,21 +1176,40 @@
         "output_cost_per_pixel": 0.0,
         "litellm_provider": "openai"
     },
+    "gpt-4o-transcribe": {
+        "mode": "audio_transcription",
+        "input_cost_per_token": 0.0000025,
+        "input_cost_per_audio_token": 0.000006,
+        "output_cost_per_token": 0.00001, 
+        "litellm_provider": "openai",
+        "supported_endpoints": ["/v1/audio/transcriptions"]
+    }, 
+    "gpt-4o-mini-transcribe": {
+        "mode": "audio_transcription",
+        "input_cost_per_token": 0.00000125,
+        "input_cost_per_audio_token": 0.000003,
+        "output_cost_per_token": 0.000005, 
+        "litellm_provider": "openai",
+        "supported_endpoints": ["/v1/audio/transcriptions"]
+    }, 
     "whisper-1": {
         "mode": "audio_transcription",
         "input_cost_per_second": 0.0001,
         "output_cost_per_second": 0.0001, 
-        "litellm_provider": "openai"
+        "litellm_provider": "openai",
+        "supported_endpoints": ["/v1/audio/transcriptions"]
     }, 
     "tts-1": {
         "mode": "audio_speech", 
         "input_cost_per_character": 0.000015,
-        "litellm_provider": "openai"
+        "litellm_provider": "openai",
+        "supported_endpoints": ["/v1/audio/speech"]
     },
     "tts-1-hd": {
         "mode": "audio_speech", 
         "input_cost_per_character": 0.000030,
-        "litellm_provider": "openai"
+        "litellm_provider": "openai",
+        "supported_endpoints": ["/v1/audio/speech"]
     },
     "azure/gpt-4o-mini-realtime-preview-2024-12-17": {
         "max_tokens": 4096,
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 09db9f10ee..b9a5765ee4 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -9,6 +9,10 @@ model_list:
    litellm_params:
     model: gpt-4o-mini
     api_key: os.environ/OPENAI_API_KEY
+ - model_name: "openai/*"
+   litellm_params:
+    model: openai/*
+    api_key: os.environ/OPENAI_API_KEY
  - model_name: "bedrock-nova"
    litellm_params:
     model: us.amazon.nova-pro-v1:0
diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py
index 19899648f5..1c5552637c 100644
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@@ -779,7 +779,12 @@ class LiteLLMFineTuningJobCreate(FineTuningJobCreate):
 AllEmbeddingInputValues = Union[str, List[str], List[int], List[List[int]]]
 
 OpenAIAudioTranscriptionOptionalParams = Literal[
-    "language", "prompt", "temperature", "response_format", "timestamp_granularities"
+    "language",
+    "prompt",
+    "temperature",
+    "response_format",
+    "timestamp_granularities",
+    "include",
 ]
 
 
diff --git a/litellm/utils.py b/litellm/utils.py
index dc97c4d898..3fcb4a803a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -6364,6 +6364,11 @@ class ProviderConfigManager:
             return litellm.FireworksAIAudioTranscriptionConfig()
         elif litellm.LlmProviders.DEEPGRAM == provider:
             return litellm.DeepgramAudioTranscriptionConfig()
+        elif litellm.LlmProviders.OPENAI == provider:
+            if "gpt-4o" in model:
+                return litellm.OpenAIGPTAudioTranscriptionConfig()
+            else:
+                return litellm.OpenAIWhisperAudioTranscriptionConfig()
         return None
 
     @staticmethod
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index b8e32a24ce..da780f8353 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -1176,21 +1176,40 @@
         "output_cost_per_pixel": 0.0,
         "litellm_provider": "openai"
     },
+    "gpt-4o-transcribe": {
+        "mode": "audio_transcription",
+        "input_cost_per_token": 0.0000025,
+        "input_cost_per_audio_token": 0.000006,
+        "output_cost_per_token": 0.00001, 
+        "litellm_provider": "openai",
+        "supported_endpoints": ["/v1/audio/transcriptions"]
+    }, 
+    "gpt-4o-mini-transcribe": {
+        "mode": "audio_transcription",
+        "input_cost_per_token": 0.00000125,
+        "input_cost_per_audio_token": 0.000003,
+        "output_cost_per_token": 0.000005, 
+        "litellm_provider": "openai",
+        "supported_endpoints": ["/v1/audio/transcriptions"]
+    }, 
     "whisper-1": {
         "mode": "audio_transcription",
         "input_cost_per_second": 0.0001,
         "output_cost_per_second": 0.0001, 
-        "litellm_provider": "openai"
+        "litellm_provider": "openai",
+        "supported_endpoints": ["/v1/audio/transcriptions"]
     }, 
     "tts-1": {
         "mode": "audio_speech", 
         "input_cost_per_character": 0.000015,
-        "litellm_provider": "openai"
+        "litellm_provider": "openai",
+        "supported_endpoints": ["/v1/audio/speech"]
     },
     "tts-1-hd": {
         "mode": "audio_speech", 
         "input_cost_per_character": 0.000030,
-        "litellm_provider": "openai"
+        "litellm_provider": "openai",
+        "supported_endpoints": ["/v1/audio/speech"]
     },
     "azure/gpt-4o-mini-realtime-preview-2024-12-17": {
         "max_tokens": 4096,
diff --git a/tests/litellm/llms/custom_httpx/test_handle_audio_file.py b/tests/litellm/llms/deepgram/audio_transcription/test_deepseek_audio_transcription_transformation.py
similarity index 60%
rename from tests/litellm/llms/custom_httpx/test_handle_audio_file.py
rename to tests/litellm/llms/deepgram/audio_transcription/test_deepseek_audio_transcription_transformation.py
index 682803788b..ea035db119 100644
--- a/tests/litellm/llms/custom_httpx/test_handle_audio_file.py
+++ b/tests/litellm/llms/deepgram/audio_transcription/test_deepseek_audio_transcription_transformation.py
@@ -1,47 +1,57 @@
-
-import os
 import io
+import os
 import pathlib
 import sys
 
 import pytest
 
-
 sys.path.insert(
-    0, os.path.abspath("../../../..")
+    0, os.path.abspath("../../../../..")
 )  # Adds the parent directory to the system path
-import litellm
 
-from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
+import litellm
+from litellm.llms.deepgram.audio_transcription.transformation import (
+    DeepgramAudioTranscriptionConfig,
+)
+
 
 @pytest.fixture
 def test_bytes():
-    return b'litellm', b'litellm'
+    return b"litellm", b"litellm"
+
 
 @pytest.fixture
 def test_io_bytes(test_bytes):
     return io.BytesIO(test_bytes[0]), test_bytes[1]
 
+
 @pytest.fixture
 def test_file():
     pwd = os.path.dirname(os.path.realpath(__file__))
     pwd_path = pathlib.Path(pwd)
-    test_root = pwd_path.parents[2]
+    test_root = pwd_path.parents[3]
+    print(f"test_root: {test_root}")
     file_path = os.path.join(test_root, "gettysburg.wav")
     f = open(file_path, "rb")
     content = f.read()
     f.seek(0)
     return f, content
 
+
 @pytest.mark.parametrize(
     "fixture_name",
     [
         "test_bytes",
         "test_io_bytes",
         "test_file",
-    ]
+    ],
 )
 def test_audio_file_handling(fixture_name, request):
-    handler = BaseLLMHTTPHandler()
+    handler = DeepgramAudioTranscriptionConfig()
     (audio_file, expected_output) = request.getfixturevalue(fixture_name)
-    assert expected_output == handler.handle_audio_file(audio_file)
\ No newline at end of file
+    assert expected_output == handler.transform_audio_transcription_request(
+        model="deepseek-audio-transcription",
+        audio_file=audio_file,
+        optional_params={},
+        litellm_params={},
+    )
diff --git a/tests/litellm_utils_tests/test_utils.py b/tests/litellm_utils_tests/test_utils.py
index fea225e4a3..535861ce1a 100644
--- a/tests/litellm_utils_tests/test_utils.py
+++ b/tests/litellm_utils_tests/test_utils.py
@@ -2074,3 +2074,13 @@ def test_delta_object():
     assert delta.role == "user"
     assert not hasattr(delta, "thinking_blocks")
     assert not hasattr(delta, "reasoning_content")
+
+
+def test_get_provider_audio_transcription_config():
+    from litellm.utils import ProviderConfigManager
+    from litellm.types.utils import LlmProviders
+
+    for provider in LlmProviders:
+        config = ProviderConfigManager.get_provider_audio_transcription_config(
+            model="whisper-1", provider=provider
+        )
diff --git a/tests/llm_translation/test_openai.py b/tests/llm_translation/test_openai.py
index 633ff76467..676019b86f 100644
--- a/tests/llm_translation/test_openai.py
+++ b/tests/llm_translation/test_openai.py
@@ -22,6 +22,7 @@ from litellm.types.llms.openai import (
     ChatCompletionAnnotation,
     ChatCompletionAnnotationURLCitation,
 )
+from base_audio_transcription_unit_tests import BaseLLMAudioTranscriptionTest
 
 
 def test_openai_prediction_param():
@@ -458,3 +459,13 @@ def test_openai_web_search_streaming():
     # Assert this request has at-least one web search annotation
     assert test_openai_web_search is not None
     validate_web_search_annotations(test_openai_web_search)
+
+
+class TestOpenAIGPT4OAudioTranscription(BaseLLMAudioTranscriptionTest):
+    def get_base_audio_transcription_call_args(self) -> dict:
+        return {
+            "model": "openai/gpt-4o-transcribe",
+        }
+
+    def get_custom_llm_provider(self) -> litellm.LlmProviders:
+        return litellm.LlmProviders.OPENAI
diff --git a/tests/local_testing/test_get_model_info.py b/tests/local_testing/test_get_model_info.py
index 27b9b1a2b6..7677f90671 100644
--- a/tests/local_testing/test_get_model_info.py
+++ b/tests/local_testing/test_get_model_info.py
@@ -520,6 +520,8 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
                             "/v1/images/variations",
                             "/v1/images/edits",
                             "/v1/batch",
+                            "/v1/audio/transcriptions",
+                            "/v1/audio/speech",
                         ],
                     },
                 },
diff --git a/tests/local_testing/test_whisper.py b/tests/local_testing/test_whisper.py
index 5bda5456a4..bca7ae2add 100644
--- a/tests/local_testing/test_whisper.py
+++ b/tests/local_testing/test_whisper.py
@@ -134,3 +134,33 @@ async def test_whisper_log_pre_call():
             file=audio_file,
         )
         mock_log_pre_call.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_whisper_log_pre_call():
+    from litellm.litellm_core_utils.litellm_logging import Logging
+    from datetime import datetime
+    from unittest.mock import patch, MagicMock
+    from litellm.integrations.custom_logger import CustomLogger
+
+    custom_logger = CustomLogger()
+
+    litellm.callbacks = [custom_logger]
+
+    with patch.object(custom_logger, "log_pre_api_call") as mock_log_pre_call:
+        await litellm.atranscription(
+            model="whisper-1",
+            file=audio_file,
+        )
+        mock_log_pre_call.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_gpt_4o_transcribe():
+    from litellm.litellm_core_utils.litellm_logging import Logging
+    from datetime import datetime
+    from unittest.mock import patch, MagicMock
+
+    await litellm.atranscription(
+        model="openai/gpt-4o-transcribe", file=audio_file, response_format="json"
+    )