Merge pull request #5534 from BerriAI/litellm_fix_transcription

[Fix] transcription/atranscription file parameter should accept correct types
2024-09-05 18:41:36 -07:00 · 2024-09-05 18:41:36 -07:00 · 8dbf372c6f
commit 8dbf372c6f
parent f584021f7c b68a9b1957
25 changed files with 471 additions and 458 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -924,7 +924,7 @@ from .llms.bedrock.embed.amazon_titan_v2_transformation import (
    AmazonTitanV2Config,
 )
 from .llms.bedrock.embed.cohere_transformation import BedrockCohereEmbeddingConfig
-from .llms.openai import (
+from .llms.OpenAI.openai import (
    OpenAIConfig,
    OpenAITextCompletionConfig,
    MistralConfig,
@ -939,7 +939,7 @@ from .llms.AI21.chat import AI21ChatConfig
 from .llms.fireworks_ai import FireworksAIConfig
 from .llms.volcengine import VolcEngineConfig
 from .llms.text_completion_codestral import MistralTextCompletionConfig
-from .llms.azure import (
+from .llms.AzureOpenAI.azure import (
    AzureOpenAIConfig,
    AzureOpenAIError,
    AzureOpenAIAssistantsAPIConfig,
--- a/litellm/assistants/main.py
+++ b/litellm/assistants/main.py
@ -21,8 +21,8 @@ from litellm.utils import (
    supports_httpx_timeout,
 )

-from ..llms.azure import AzureAssistantsAPI
-from ..llms.openai import OpenAIAssistantsAPI
+from ..llms.AzureOpenAI.azure import AzureAssistantsAPI
+from ..llms.OpenAI.openai import OpenAIAssistantsAPI
 from ..types.llms.openai import *
 from ..types.router import *
 from .utils import get_optional_params_add_message
@ -184,6 +184,21 @@ def get_assistants(
                request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"),  # type: ignore
            ),
        )
+
+    if response is None:
+        raise litellm.exceptions.BadRequestError(
+            message="LiteLLM doesn't support {} for 'get_assistants'. Only 'openai' is supported.".format(
+                custom_llm_provider
+            ),
+            model="n/a",
+            llm_provider=custom_llm_provider,
+            response=httpx.Response(
+                status_code=400,
+                content="Unsupported provider",
+                request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"),  # type: ignore
+            ),
+        )
+
    return response


--- a/litellm/batches/main.py
+++ b/litellm/batches/main.py
@ -20,8 +20,8 @@ import httpx

 import litellm
 from litellm import client
-from litellm.llms.azure import AzureBatchesAPI
-from litellm.llms.openai import OpenAIBatchesAPI
+from litellm.llms.AzureOpenAI.azure import AzureBatchesAPI
+from litellm.llms.OpenAI.openai import OpenAIBatchesAPI
 from litellm.secret_managers.main import get_secret
 from litellm.types.llms.openai import (
    Batch,
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -17,7 +17,7 @@ import time
 import traceback
 from datetime import timedelta
 from enum import Enum
-from typing import Any, BinaryIO, List, Literal, Optional, Union
+from typing import Any, List, Literal, Optional, Union

 from openai._models import BaseModel as OpenAIObject

--- a/litellm/files/main.py
+++ b/litellm/files/main.py
@ -16,7 +16,7 @@ import httpx
 import litellm
 from litellm import client, get_secret
 from litellm.llms.files_apis.azure import AzureOpenAIFilesAPI
-from litellm.llms.openai import FileDeleted, FileObject, OpenAIFilesAPI
+from litellm.llms.OpenAI.openai import FileDeleted, FileObject, OpenAIFilesAPI
 from litellm.types.llms.openai import (
    Batch,
    CreateFileRequest,
--- a/litellm/litellm_core_utils/audio_utils/utils.py
+++ b/litellm/litellm_core_utils/audio_utils/utils.py
@ -0,0 +1,23 @@
+"""
+Utils used for litellm.transcription() and litellm.atranscription()
+"""
+
+from litellm.types.utils import FileTypes
+
+
+def get_audio_file_name(file_obj: FileTypes) -> str:
+    """
+    Safely get the name of a file-like object or return its string representation.
+
+    Args:
+        file_obj (Any): A file-like object or any other object.
+
+    Returns:
+        str: The name of the file if available, otherwise a string representation of the object.
+    """
+    if hasattr(file_obj, "name"):
+        return getattr(file_obj, "name")
+    elif hasattr(file_obj, "__str__"):
+        return str(file_obj)
+    else:
+        return repr(file_obj)
--- a/litellm/litellm_core_utils/core_helpers.py
+++ b/litellm/litellm_core_utils/core_helpers.py
@ -1,7 +1,7 @@
 # What is this?
 ## Helper utilities
 import os
-from typing import BinaryIO, List, Literal, Optional, Tuple
+from typing import List, Literal, Optional, Tuple

 from litellm._logging import verbose_logger

@ -86,20 +86,3 @@ def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None):
            return kwargs["litellm_parent_otel_span"]
    except:
        return None
-
-
-def get_file_check_sum(_file: BinaryIO):
-    """
-    Helper to safely get file checksum - used as a cache key
-    """
-    try:
-        file_descriptor = _file.fileno()
-        file_stat = os.fstat(file_descriptor)
-        file_size = str(file_stat.st_size)
-        file_checksum = _file.name + file_size
-        return file_checksum
-    except Exception as e:
-        verbose_logger.error(f"Error getting file_checksum: {(str(e))}")
-        file_checksum = _file.name
-        return file_checksum
-    return file_checksum
--- a/litellm/llms/AzureOpenAI/audio_transcriptions.py
+++ b/litellm/llms/AzureOpenAI/audio_transcriptions.py
@ -0,0 +1,192 @@
+import uuid
+from typing import Optional, Union
+
+import httpx
+from openai import AsyncAzureOpenAI, AzureOpenAI
+from pydantic import BaseModel
+
+import litellm
+from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.types.utils import FileTypes
+from litellm.utils import TranscriptionResponse, convert_to_model_response_object
+
+from .azure import (
+    AzureChatCompletion,
+    get_azure_ad_token_from_oidc,
+    select_azure_base_url_or_endpoint,
+)
+
+
+class AzureAudioTranscription(AzureChatCompletion):
+    def audio_transcriptions(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        model_response: TranscriptionResponse,
+        timeout: float,
+        max_retries: int,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        api_version: Optional[str] = None,
+        client=None,
+        azure_ad_token: Optional[str] = None,
+        logging_obj=None,
+        atranscription: bool = False,
+    ):
+        data = {"model": model, "file": audio_file, **optional_params}
+
+        # init AzureOpenAI Client
+        azure_client_params = {
+            "api_version": api_version,
+            "azure_endpoint": api_base,
+            "azure_deployment": model,
+            "timeout": timeout,
+        }
+
+        azure_client_params = select_azure_base_url_or_endpoint(
+            azure_client_params=azure_client_params
+        )
+        if api_key is not None:
+            azure_client_params["api_key"] = api_key
+        elif azure_ad_token is not None:
+            if azure_ad_token.startswith("oidc/"):
+                azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
+            azure_client_params["azure_ad_token"] = azure_ad_token
+
+        if max_retries is not None:
+            azure_client_params["max_retries"] = max_retries
+
+        if atranscription is True:
+            return self.async_audio_transcriptions(
+                audio_file=audio_file,
+                data=data,
+                model_response=model_response,
+                timeout=timeout,
+                api_key=api_key,
+                api_base=api_base,
+                client=client,
+                azure_client_params=azure_client_params,
+                max_retries=max_retries,
+                logging_obj=logging_obj,
+            )
+        if client is None:
+            azure_client = AzureOpenAI(http_client=litellm.client_session, **azure_client_params)  # type: ignore
+        else:
+            azure_client = client
+
+        ## LOGGING
+        logging_obj.pre_call(
+            input=f"audio_file_{uuid.uuid4()}",
+            api_key=azure_client.api_key,
+            additional_args={
+                "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
+                "api_base": azure_client._base_url._uri_reference,
+                "atranscription": True,
+                "complete_input_dict": data,
+            },
+        )
+
+        response = azure_client.audio.transcriptions.create(
+            **data, timeout=timeout  # type: ignore
+        )
+
+        if isinstance(response, BaseModel):
+            stringified_response = response.model_dump()
+        else:
+            stringified_response = TranscriptionResponse(text=response).model_dump()
+
+        ## LOGGING
+        logging_obj.post_call(
+            input=get_audio_file_name(audio_file),
+            api_key=api_key,
+            additional_args={"complete_input_dict": data},
+            original_response=stringified_response,
+        )
+        hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
+        final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription")  # type: ignore
+        return final_response
+
+    async def async_audio_transcriptions(
+        self,
+        audio_file: FileTypes,
+        data: dict,
+        model_response: TranscriptionResponse,
+        timeout: float,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        client=None,
+        azure_client_params=None,
+        max_retries=None,
+        logging_obj=None,
+    ):
+        response = None
+        try:
+            if client is None:
+                async_azure_client = AsyncAzureOpenAI(
+                    **azure_client_params,
+                    http_client=litellm.aclient_session,
+                )
+            else:
+                async_azure_client = client
+
+            ## LOGGING
+            logging_obj.pre_call(
+                input=f"audio_file_{uuid.uuid4()}",
+                api_key=async_azure_client.api_key,
+                additional_args={
+                    "headers": {
+                        "Authorization": f"Bearer {async_azure_client.api_key}"
+                    },
+                    "api_base": async_azure_client._base_url._uri_reference,
+                    "atranscription": True,
+                    "complete_input_dict": data,
+                },
+            )
+
+            raw_response = (
+                await async_azure_client.audio.transcriptions.with_raw_response.create(
+                    **data, timeout=timeout
+                )
+            )  # type: ignore
+
+            headers = dict(raw_response.headers)
+            response = raw_response.parse()
+
+            if isinstance(response, BaseModel):
+                stringified_response = response.model_dump()
+            else:
+                stringified_response = TranscriptionResponse(text=response).model_dump()
+
+            ## LOGGING
+            logging_obj.post_call(
+                input=get_audio_file_name(audio_file),
+                api_key=api_key,
+                additional_args={
+                    "headers": {
+                        "Authorization": f"Bearer {async_azure_client.api_key}"
+                    },
+                    "api_base": async_azure_client._base_url._uri_reference,
+                    "atranscription": True,
+                    "complete_input_dict": data,
+                },
+                original_response=stringified_response,
+            )
+            hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
+            response = convert_to_model_response_object(
+                _response_headers=headers,
+                response_object=stringified_response,
+                model_response_object=model_response,
+                hidden_params=hidden_params,
+                response_type="audio_transcription",
+            )  # type: ignore
+            return response
+        except Exception as e:
+            ## LOGGING
+            logging_obj.post_call(
+                input=input,
+                api_key=api_key,
+                original_response=str(e),
+            )
+            raise e
--- a/litellm/llms/AzureOpenAI/azure.py
+++ b/litellm/llms/AzureOpenAI/azure.py
@ -4,17 +4,7 @@ import os
 import time
 import types
 import uuid
-from typing import (
-    Any,
-    BinaryIO,
-    Callable,
-    Coroutine,
-    Iterable,
-    List,
-    Literal,
-    Optional,
-    Union,
-)
+from typing import Any, Callable, Coroutine, Iterable, List, Literal, Optional, Union

 import httpx  # type: ignore
 import requests
@ -27,6 +17,7 @@ from litellm import ImageResponse, OpenAIConfig
 from litellm.caching import DualCache
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.types.utils import FileTypes
 from litellm.utils import (
    Choices,
    CustomStreamWrapper,
@ -39,7 +30,7 @@ from litellm.utils import (
    modify_url,
 )

-from ..types.llms.openai import (
+from ...types.llms.openai import (
    Assistant,
    AssistantEventHandler,
    AssistantStreamManager,
@ -63,7 +54,7 @@ from ..types.llms.openai import (
    SyncCursorPage,
    Thread,
 )
-from .base import BaseLLM
+from ..base import BaseLLM

 azure_ad_cache = DualCache()

@ -1571,178 +1562,6 @@ class AzureChatCompletion(BaseLLM):
            else:
                raise AzureOpenAIError(status_code=500, message=str(e))

-    def audio_transcriptions(
-        self,
-        model: str,
-        audio_file: BinaryIO,
-        optional_params: dict,
-        model_response: TranscriptionResponse,
-        timeout: float,
-        max_retries: int,
-        api_key: Optional[str] = None,
-        api_base: Optional[str] = None,
-        api_version: Optional[str] = None,
-        client=None,
-        azure_ad_token: Optional[str] = None,
-        logging_obj=None,
-        atranscription: bool = False,
-    ):
-        data = {"model": model, "file": audio_file, **optional_params}
-
-        # init AzureOpenAI Client
-        azure_client_params = {
-            "api_version": api_version,
-            "azure_endpoint": api_base,
-            "azure_deployment": model,
-            "timeout": timeout,
-        }
-
-        azure_client_params = select_azure_base_url_or_endpoint(
-            azure_client_params=azure_client_params
-        )
-        if api_key is not None:
-            azure_client_params["api_key"] = api_key
-        elif azure_ad_token is not None:
-            if azure_ad_token.startswith("oidc/"):
-                azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
-            azure_client_params["azure_ad_token"] = azure_ad_token
-
-        if max_retries is not None:
-            azure_client_params["max_retries"] = max_retries
-
-        if atranscription is True:
-            return self.async_audio_transcriptions(
-                audio_file=audio_file,
-                data=data,
-                model_response=model_response,
-                timeout=timeout,
-                api_key=api_key,
-                api_base=api_base,
-                client=client,
-                azure_client_params=azure_client_params,
-                max_retries=max_retries,
-                logging_obj=logging_obj,
-            )
-        if client is None:
-            azure_client = AzureOpenAI(http_client=litellm.client_session, **azure_client_params)  # type: ignore
-        else:
-            azure_client = client
-
-        ## LOGGING
-        logging_obj.pre_call(
-            input=f"audio_file_{uuid.uuid4()}",
-            api_key=azure_client.api_key,
-            additional_args={
-                "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
-                "api_base": azure_client._base_url._uri_reference,
-                "atranscription": True,
-                "complete_input_dict": data,
-            },
-        )
-
-        response = azure_client.audio.transcriptions.create(
-            **data, timeout=timeout  # type: ignore
-        )
-
-        if isinstance(response, BaseModel):
-            stringified_response = response.model_dump()
-        else:
-            stringified_response = TranscriptionResponse(text=response).model_dump()
-
-        ## LOGGING
-        logging_obj.post_call(
-            input=audio_file.name,
-            api_key=api_key,
-            additional_args={"complete_input_dict": data},
-            original_response=stringified_response,
-        )
-        hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
-        final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription")  # type: ignore
-        return final_response
-
-    async def async_audio_transcriptions(
-        self,
-        audio_file: BinaryIO,
-        data: dict,
-        model_response: TranscriptionResponse,
-        timeout: float,
-        api_key: Optional[str] = None,
-        api_base: Optional[str] = None,
-        client=None,
-        azure_client_params=None,
-        max_retries=None,
-        logging_obj=None,
-    ):
-        response = None
-        try:
-            if client is None:
-                async_azure_client = AsyncAzureOpenAI(
-                    **azure_client_params,
-                    http_client=litellm.aclient_session,
-                )
-            else:
-                async_azure_client = client
-
-            ## LOGGING
-            logging_obj.pre_call(
-                input=f"audio_file_{uuid.uuid4()}",
-                api_key=async_azure_client.api_key,
-                additional_args={
-                    "headers": {
-                        "Authorization": f"Bearer {async_azure_client.api_key}"
-                    },
-                    "api_base": async_azure_client._base_url._uri_reference,
-                    "atranscription": True,
-                    "complete_input_dict": data,
-                },
-            )
-
-            raw_response = (
-                await async_azure_client.audio.transcriptions.with_raw_response.create(
-                    **data, timeout=timeout
-                )
-            )  # type: ignore
-
-            headers = dict(raw_response.headers)
-            response = raw_response.parse()
-
-            if isinstance(response, BaseModel):
-                stringified_response = response.model_dump()
-            else:
-                stringified_response = TranscriptionResponse(text=response).model_dump()
-
-            ## LOGGING
-            logging_obj.post_call(
-                input=audio_file.name,
-                api_key=api_key,
-                additional_args={
-                    "headers": {
-                        "Authorization": f"Bearer {async_azure_client.api_key}"
-                    },
-                    "api_base": async_azure_client._base_url._uri_reference,
-                    "atranscription": True,
-                    "complete_input_dict": data,
-                },
-                original_response=stringified_response,
-            )
-            hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
-            response = convert_to_model_response_object(
-                _response_headers=headers,
-                response_object=stringified_response,
-                model_response_object=model_response,
-                hidden_params=hidden_params,
-                response_type="audio_transcription",
-            )  # type: ignore
-            return response
-        except Exception as e:
-            ## LOGGING
-            logging_obj.post_call(
-                input=input,
-                api_key=api_key,
-                original_response=str(e),
-            )
-            raise e
-
    def audio_speech(
        self,
        model: str,
@ -2473,7 +2292,7 @@ class AzureAssistantsAPI(BaseLLM):
        """
        Here's an example:
        ```
-        from litellm.llms.openai import OpenAIAssistantsAPI, MessageData
+        from litellm.llms.OpenAI.openai import OpenAIAssistantsAPI, MessageData

        # create thread
        message: MessageData = {"role": "user", "content": "Hey, how's it going?"}
--- a/litellm/llms/OpenAI/audio_transcriptions.py
+++ b/litellm/llms/OpenAI/audio_transcriptions.py
@ -0,0 +1,177 @@
+from typing import Optional, Union
+
+import httpx
+from openai import AsyncOpenAI, OpenAI
+from pydantic import BaseModel
+
+import litellm
+from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.types.utils import FileTypes
+from litellm.utils import TranscriptionResponse, convert_to_model_response_object
+
+from .openai import OpenAIChatCompletion
+
+
+class OpenAIAudioTranscription(OpenAIChatCompletion):
+    # Audio Transcriptions
+    async def make_openai_audio_transcriptions_request(
+        self,
+        openai_aclient: AsyncOpenAI,
+        data: dict,
+        timeout: Union[float, httpx.Timeout],
+    ):
+        """
+        Helper to:
+        - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
+        - call openai_aclient.audio.transcriptions.create by default
+        """
+        try:
+            if litellm.return_response_headers is True:
+                raw_response = (
+                    await openai_aclient.audio.transcriptions.with_raw_response.create(
+                        **data, timeout=timeout
+                    )
+                )  # type: ignore
+                headers = dict(raw_response.headers)
+                response = raw_response.parse()
+                return headers, response
+            else:
+                response = await openai_aclient.audio.transcriptions.create(**data, timeout=timeout)  # type: ignore
+                return None, response
+        except Exception as e:
+            raise e
+
+    def make_sync_openai_audio_transcriptions_request(
+        self,
+        openai_client: OpenAI,
+        data: dict,
+        timeout: Union[float, httpx.Timeout],
+    ):
+        """
+        Helper to:
+        - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
+        - call openai_aclient.audio.transcriptions.create by default
+        """
+        try:
+            if litellm.return_response_headers is True:
+                raw_response = (
+                    openai_client.audio.transcriptions.with_raw_response.create(
+                        **data, timeout=timeout
+                    )
+                )  # type: ignore
+                headers = dict(raw_response.headers)
+                response = raw_response.parse()
+                return headers, response
+            else:
+                response = openai_client.audio.transcriptions.create(**data, timeout=timeout)  # type: ignore
+                return None, response
+        except Exception as e:
+            raise e
+
+    def audio_transcriptions(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        model_response: TranscriptionResponse,
+        timeout: float,
+        max_retries: int,
+        api_key: Optional[str],
+        api_base: Optional[str],
+        client=None,
+        logging_obj=None,
+        atranscription: bool = False,
+    ):
+        data = {"model": model, "file": audio_file, **optional_params}
+        if atranscription is True:
+            return self.async_audio_transcriptions(
+                audio_file=audio_file,
+                data=data,
+                model_response=model_response,
+                timeout=timeout,
+                api_key=api_key,
+                api_base=api_base,
+                client=client,
+                max_retries=max_retries,
+                logging_obj=logging_obj,
+            )
+
+        openai_client = self._get_openai_client(
+            is_async=False,
+            api_key=api_key,
+            api_base=api_base,
+            timeout=timeout,
+            max_retries=max_retries,
+        )
+        _, response = self.make_sync_openai_audio_transcriptions_request(
+            openai_client=openai_client,
+            data=data,
+            timeout=timeout,
+        )
+
+        if isinstance(response, BaseModel):
+            stringified_response = response.model_dump()
+        else:
+            stringified_response = TranscriptionResponse(text=response).model_dump()
+
+        ## LOGGING
+        logging_obj.post_call(
+            input=get_audio_file_name(audio_file),
+            api_key=api_key,
+            additional_args={"complete_input_dict": data},
+            original_response=stringified_response,
+        )
+        hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
+        final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription")  # type: ignore
+        return final_response
+
+    async def async_audio_transcriptions(
+        self,
+        audio_file: FileTypes,
+        data: dict,
+        model_response: TranscriptionResponse,
+        timeout: float,
+        logging_obj: LiteLLMLoggingObj,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        client=None,
+        max_retries=None,
+    ):
+        try:
+            openai_aclient = self._get_openai_client(
+                is_async=True,
+                api_key=api_key,
+                api_base=api_base,
+                timeout=timeout,
+                max_retries=max_retries,
+                client=client,
+            )
+
+            headers, response = await self.make_openai_audio_transcriptions_request(
+                openai_aclient=openai_aclient,
+                data=data,
+                timeout=timeout,
+            )
+            logging_obj.model_call_details["response_headers"] = headers
+            if isinstance(response, BaseModel):
+                stringified_response = response.model_dump()
+            else:
+                stringified_response = TranscriptionResponse(text=response).model_dump()
+            ## LOGGING
+            logging_obj.post_call(
+                input=get_audio_file_name(audio_file),
+                api_key=api_key,
+                additional_args={"complete_input_dict": data},
+                original_response=stringified_response,
+            )
+            hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
+            return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription")  # type: ignore
+        except Exception as e:
+            ## LOGGING
+            logging_obj.post_call(
+                input=input,
+                api_key=api_key,
+                original_response=str(e),
+            )
+            raise e
--- a/litellm/llms/OpenAI/openai.py
+++ b/litellm/llms/OpenAI/openai.py
@ -4,16 +4,7 @@ import os
 import time
 import traceback
 import types
-from typing import (
-    Any,
-    BinaryIO,
-    Callable,
-    Coroutine,
-    Iterable,
-    Literal,
-    Optional,
-    Union,
-)
+from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union

 import httpx
 import openai
@ -33,14 +24,13 @@ from litellm.utils import (
    Message,
    ModelResponse,
    TextCompletionResponse,
-    TranscriptionResponse,
    Usage,
    convert_to_model_response_object,
 )

-from ..types.llms.openai import *
-from .base import BaseLLM
-from .prompt_templates.factory import custom_prompt, prompt_factory
+from ...types.llms.openai import *
+from ..base import BaseLLM
+from ..prompt_templates.factory import custom_prompt, prompt_factory


 class OpenAIError(Exception):
@ -1608,168 +1598,6 @@ class OpenAIChatCompletion(BaseLLM):
            else:
                raise OpenAIError(status_code=500, message=str(e))

-    # Audio Transcriptions
-    async def make_openai_audio_transcriptions_request(
-        self,
-        openai_aclient: AsyncOpenAI,
-        data: dict,
-        timeout: Union[float, httpx.Timeout],
-    ):
-        """
-        Helper to:
-        - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
-        - call openai_aclient.audio.transcriptions.create by default
-        """
-        try:
-            if litellm.return_response_headers is True:
-                raw_response = (
-                    await openai_aclient.audio.transcriptions.with_raw_response.create(
-                        **data, timeout=timeout
-                    )
-                )  # type: ignore
-                headers = dict(raw_response.headers)
-                response = raw_response.parse()
-                return headers, response
-            else:
-                response = await openai_aclient.audio.transcriptions.create(**data, timeout=timeout)  # type: ignore
-                return None, response
-        except Exception as e:
-            raise e
-
-    def make_sync_openai_audio_transcriptions_request(
-        self,
-        openai_client: OpenAI,
-        data: dict,
-        timeout: Union[float, httpx.Timeout],
-    ):
-        """
-        Helper to:
-        - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
-        - call openai_aclient.audio.transcriptions.create by default
-        """
-        try:
-            if litellm.return_response_headers is True:
-                raw_response = (
-                    openai_client.audio.transcriptions.with_raw_response.create(
-                        **data, timeout=timeout
-                    )
-                )  # type: ignore
-                headers = dict(raw_response.headers)
-                response = raw_response.parse()
-                return headers, response
-            else:
-                response = openai_client.audio.transcriptions.create(**data, timeout=timeout)  # type: ignore
-                return None, response
-        except Exception as e:
-            raise e
-
-    def audio_transcriptions(
-        self,
-        model: str,
-        audio_file: BinaryIO,
-        optional_params: dict,
-        model_response: TranscriptionResponse,
-        timeout: float,
-        max_retries: int,
-        api_key: Optional[str],
-        api_base: Optional[str],
-        client=None,
-        logging_obj=None,
-        atranscription: bool = False,
-    ):
-        data = {"model": model, "file": audio_file, **optional_params}
-        if atranscription is True:
-            return self.async_audio_transcriptions(
-                audio_file=audio_file,
-                data=data,
-                model_response=model_response,
-                timeout=timeout,
-                api_key=api_key,
-                api_base=api_base,
-                client=client,
-                max_retries=max_retries,
-                logging_obj=logging_obj,
-            )
-
-        openai_client = self._get_openai_client(
-            is_async=False,
-            api_key=api_key,
-            api_base=api_base,
-            timeout=timeout,
-            max_retries=max_retries,
-        )
-        _, response = self.make_sync_openai_audio_transcriptions_request(
-            openai_client=openai_client,
-            data=data,
-            timeout=timeout,
-        )
-
-        if isinstance(response, BaseModel):
-            stringified_response = response.model_dump()
-        else:
-            stringified_response = TranscriptionResponse(text=response).model_dump()
-
-        ## LOGGING
-        logging_obj.post_call(
-            input=audio_file.name,
-            api_key=api_key,
-            additional_args={"complete_input_dict": data},
-            original_response=stringified_response,
-        )
-        hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
-        final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription")  # type: ignore
-        return final_response
-
-    async def async_audio_transcriptions(
-        self,
-        audio_file: BinaryIO,
-        data: dict,
-        model_response: TranscriptionResponse,
-        timeout: float,
-        logging_obj: LiteLLMLoggingObj,
-        api_key: Optional[str] = None,
-        api_base: Optional[str] = None,
-        client=None,
-        max_retries=None,
-    ):
-        try:
-            openai_aclient = self._get_openai_client(
-                is_async=True,
-                api_key=api_key,
-                api_base=api_base,
-                timeout=timeout,
-                max_retries=max_retries,
-                client=client,
-            )
-
-            headers, response = await self.make_openai_audio_transcriptions_request(
-                openai_aclient=openai_aclient,
-                data=data,
-                timeout=timeout,
-            )
-            logging_obj.model_call_details["response_headers"] = headers
-            if isinstance(response, BaseModel):
-                stringified_response = response.model_dump()
-            else:
-                stringified_response = TranscriptionResponse(text=response).model_dump()
-            ## LOGGING
-            logging_obj.post_call(
-                input=audio_file.name,
-                api_key=api_key,
-                additional_args={"complete_input_dict": data},
-                original_response=stringified_response,
-            )
-            hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
-            return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription")  # type: ignore
-        except Exception as e:
-            ## LOGGING
-            logging_obj.post_call(
-                input=input,
-                api_key=api_key,
-                original_response=str(e),
-            )
-            raise e
-
    def audio_speech(
        self,
        model: str,
@ -3228,7 +3056,7 @@ class OpenAIAssistantsAPI(BaseLLM):
        """
        Here's an example:
        ```
-        from litellm.llms.openai import OpenAIAssistantsAPI, MessageData
+        from litellm.llms.OpenAI.openai import OpenAIAssistantsAPI, MessageData

        # create thread
        message: MessageData = {"role": "user", "content": "Hey, how's it going?"}
--- a/litellm/llms/azure_text.py
+++ b/litellm/llms/azure_text.py
@ -1,7 +1,7 @@
 import json
 import types  # type: ignore
 import uuid
-from typing import Any, BinaryIO, Callable, Optional, Union
+from typing import Any, Callable, Optional, Union

 import httpx
 import requests
@ -19,8 +19,8 @@ from litellm.utils import (
    convert_to_model_response_object,
 )

-from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig
 from .base import BaseLLM
+from .OpenAI.openai import OpenAITextCompletion, OpenAITextCompletionConfig
 from .prompt_templates.factory import custom_prompt, prompt_factory

 openai_text_completion_config = OpenAITextCompletionConfig()
--- a/litellm/llms/vertex_ai_and_google_ai_studio/context_caching/vertex_ai_context_caching.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/context_caching/vertex_ai_context_caching.py
@ -7,7 +7,7 @@ import litellm
 from litellm.caching import Cache
 from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
-from litellm.llms.openai import AllMessageValues
+from litellm.llms.OpenAI.openai import AllMessageValues
 from litellm.types.llms.vertex_ai import (
    CachedContentListAllResponseBody,
    RequestBody,
--- a/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py
@ -12,7 +12,7 @@ from litellm.llms.custom_httpx.http_handler import (
    _get_async_httpx_client,
    _get_httpx_client,
 )
-from litellm.llms.openai import HttpxBinaryResponseContent
+from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent
 from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
    VertexLLM,
 )
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py
@ -81,7 +81,7 @@ class VertexAIPartnerModels(BaseLLM):
            from google.cloud import aiplatform

            from litellm.llms.databricks import DatabricksChatCompletion
-            from litellm.llms.openai import OpenAIChatCompletion
+            from litellm.llms.OpenAI.openai import OpenAIChatCompletion
            from litellm.llms.text_completion_codestral import CodestralTextCompletion
            from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
                VertexLLM,
--- a/litellm/main.py
+++ b/litellm/main.py
@ -22,18 +22,7 @@ import uuid
 from concurrent.futures import ThreadPoolExecutor
 from copy import deepcopy
 from functools import partial
-from typing import (
-    Any,
-    BinaryIO,
-    Callable,
-    Dict,
-    List,
-    Literal,
-    Mapping,
-    Optional,
-    Type,
-    Union,
-)
+from typing import Any, Callable, Dict, List, Literal, Mapping, Optional, Type, Union

 import dotenv
 import httpx
@ -93,8 +82,9 @@ from .llms import (
 from .llms.AI21 import completion as ai21
 from .llms.anthropic.chat import AnthropicChatCompletion
 from .llms.anthropic.completion import AnthropicTextCompletion
-from .llms.azure import AzureChatCompletion, _check_dynamic_azure_params
 from .llms.azure_text import AzureTextCompletion
+from .llms.AzureOpenAI.audio_transcriptions import AzureAudioTranscription
+from .llms.AzureOpenAI.azure import AzureChatCompletion, _check_dynamic_azure_params
 from .llms.bedrock import image_generation as bedrock_image_generation  # type: ignore
 from .llms.bedrock.chat import BedrockConverseLLM, BedrockLLM
 from .llms.bedrock.embed.embedding import BedrockEmbedding
@ -104,7 +94,8 @@ from .llms.cohere import embed as cohere_embed
 from .llms.custom_llm import CustomLLM, custom_chat_llm_router
 from .llms.databricks import DatabricksChatCompletion
 from .llms.huggingface_restapi import Huggingface
-from .llms.openai import OpenAIChatCompletion, OpenAITextCompletion
+from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
+from .llms.OpenAI.openai import OpenAIChatCompletion, OpenAITextCompletion
 from .llms.predibase import PredibaseChatCompletion
 from .llms.prompt_templates.factory import (
    custom_prompt,
@ -146,6 +137,7 @@ from .types.llms.openai import HttpxBinaryResponseContent
 from .types.utils import (
    AdapterCompletionStreamWrapper,
    ChatCompletionMessageToolCall,
+    FileTypes,
    HiddenParams,
    all_litellm_params,
 )
@ -169,11 +161,13 @@ from litellm.utils import (
 ####### ENVIRONMENT VARIABLES ###################
 openai_chat_completions = OpenAIChatCompletion()
 openai_text_completions = OpenAITextCompletion()
+openai_audio_transcriptions = OpenAIAudioTranscription()
 databricks_chat_completions = DatabricksChatCompletion()
 anthropic_chat_completions = AnthropicChatCompletion()
 anthropic_text_completions = AnthropicTextCompletion()
 azure_chat_completions = AzureChatCompletion()
 azure_text_completions = AzureTextCompletion()
+azure_audio_transcriptions = AzureAudioTranscription()
 huggingface = Huggingface()
 predibase_chat_completions = PredibaseChatCompletion()
 codestral_text_completions = CodestralTextCompletion()
@ -4614,7 +4608,7 @@ async def atranscription(*args, **kwargs) -> TranscriptionResponse:
@client
 def transcription(
    model: str,
-    file: BinaryIO,
+    file: FileTypes,
    ## OPTIONAL OPENAI PARAMS ##
    language: Optional[str] = None,
    prompt: Optional[str] = None,
@ -4704,7 +4698,7 @@ def transcription(
            or get_secret("AZURE_API_KEY")
        )  # type: ignore

-        response = azure_chat_completions.audio_transcriptions(
+        response = azure_audio_transcriptions.audio_transcriptions(
            model=model,
            audio_file=file,
            optional_params=optional_params,
@ -4738,7 +4732,7 @@ def transcription(
            or litellm.openai_key
            or get_secret("OPENAI_API_KEY")
        )  # type: ignore
-        response = openai_chat_completions.audio_transcriptions(
+        response = openai_audio_transcriptions.audio_transcriptions(
            model=model,
            audio_file=file,
            optional_params=optional_params,
--- a/litellm/router.py
+++ b/litellm/router.py
@ -24,18 +24,7 @@ import traceback
 import uuid
 from collections import defaultdict
 from datetime import datetime
-from typing import (
-    Any,
-    BinaryIO,
-    Dict,
-    Iterable,
-    List,
-    Literal,
-    Optional,
-    Tuple,
-    TypedDict,
-    Union,
-)
+from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, TypedDict, Union

 import httpx
 import openai
@ -48,7 +37,7 @@ from litellm.assistants.main import AssistantDeleted
 from litellm.caching import DualCache, InMemoryCache, RedisCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
-from litellm.llms.azure import get_azure_ad_token_from_oidc
+from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc
 from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
 from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
 from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
@ -1342,7 +1331,7 @@ class Router:
                self.fail_calls[model_name] += 1
            raise e

-    async def atranscription(self, file: BinaryIO, model: str, **kwargs):
+    async def atranscription(self, file: FileTypes, model: str, **kwargs):
        """
        Example Usage:

@ -1386,7 +1375,7 @@ class Router:
            )
            raise e

-    async def _atranscription(self, file: BinaryIO, model: str, **kwargs):
+    async def _atranscription(self, file: FileTypes, model: str, **kwargs):
        try:
            verbose_router_logger.debug(
                f"Inside _atranscription()- model: {model}; kwargs: {kwargs}"
--- a/litellm/router_utils/client_initalization_utils.py
+++ b/litellm/router_utils/client_initalization_utils.py
@ -8,7 +8,7 @@ import openai

 import litellm
 from litellm._logging import verbose_router_logger
-from litellm.llms.azure import get_azure_ad_token_from_oidc
+from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc
 from litellm.secret_managers.get_azure_ad_token_provider import (
    get_azure_ad_token_provider,
 )
@ -337,7 +337,9 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict):
                    azure_client_params["azure_ad_token_provider"] = (
                        azure_ad_token_provider
                    )
-                from litellm.llms.azure import select_azure_base_url_or_endpoint
+                from litellm.llms.AzureOpenAI.azure import (
+                    select_azure_base_url_or_endpoint,
+                )

                # this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
                # required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
--- a/litellm/tests/test_assistants.py
+++ b/litellm/tests/test_assistants.py
@ -20,15 +20,15 @@ from typing_extensions import override

 import litellm
 from litellm import create_thread, get_thread
-from litellm.llms.openai import (
+from litellm.llms.OpenAI.openai import (
    AssistantEventHandler,
    AsyncAssistantEventHandler,
    AsyncCursorPage,
    MessageData,
    OpenAIAssistantsAPI,
 )
-from litellm.llms.openai import OpenAIMessage as Message
-from litellm.llms.openai import SyncCursorPage, Thread
+from litellm.llms.OpenAI.openai import OpenAIMessage as Message
+from litellm.llms.OpenAI.openai import SyncCursorPage, Thread

 """
 V0 Scope:
--- a/litellm/tests/test_audio_speech.py
+++ b/litellm/tests/test_audio_speech.py
@ -60,7 +60,7 @@ async def test_audio_speech_litellm(sync_mode, model, api_base, api_key):
            optional_params={},
        )

-        from litellm.llms.openai import HttpxBinaryResponseContent
+        from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent

        assert isinstance(response, HttpxBinaryResponseContent)
    else:
@ -78,7 +78,7 @@ async def test_audio_speech_litellm(sync_mode, model, api_base, api_key):
            optional_params={},
        )

-        from litellm.llms.openai import HttpxBinaryResponseContent
+        from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent

        assert isinstance(response, HttpxBinaryResponseContent)

@ -115,7 +115,7 @@ async def test_audio_speech_router(mode):
        optional_params={},
    )

-    from litellm.llms.openai import HttpxBinaryResponseContent
+    from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent

    assert isinstance(response, HttpxBinaryResponseContent)

@ -146,7 +146,7 @@ async def test_audio_speech_litellm_vertex(sync_mode):

        from types import SimpleNamespace

-        from litellm.llms.openai import HttpxBinaryResponseContent
+        from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent

        response.stream_to_file(speech_file_path)

--- a/litellm/tests/test_secret_manager.py
+++ b/litellm/tests/test_secret_manager.py
@ -16,7 +16,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import pytest

-from litellm.llms.azure import get_azure_ad_token_from_oidc
+from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc
 from litellm.llms.bedrock.chat import BedrockConverseLLM, BedrockLLM
 from litellm.secret_managers.aws_secret_manager import load_aws_secret_manager
 from litellm.secret_managers.main import get_secret
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -20,7 +20,7 @@ from litellm.utils import ModelResponseListIterator

 sys.path.insert(
    0, os.path.abspath("../..")
-)  # Adds the parent directory to the system path
+)  # Adds the parent directory to the system-path
 from dotenv import load_dotenv

 load_dotenv()
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -1,16 +1,5 @@
 from os import PathLike
-from typing import (
-    IO,
-    Any,
-    BinaryIO,
-    Iterable,
-    List,
-    Literal,
-    Mapping,
-    Optional,
-    Tuple,
-    Union,
-)
+from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union

 from openai._legacy_response import HttpxBinaryResponseContent
 from openai.lib.streaming._assistants import (
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -5,6 +5,7 @@ from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Tuple, Union

 from openai._models import BaseModel as OpenAIObject
+from openai.types.audio.transcription_create_params import FileTypes
 from openai.types.completion_usage import CompletionUsage
 from pydantic import ConfigDict, Field, PrivateAttr
 from typing_extensions import Callable, Dict, Required, TypedDict, override
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -55,12 +55,10 @@ from tokenizers import Tokenizer
 import litellm
 import litellm._service_logger  # for storing API inputs, outputs, and metadata
 import litellm.litellm_core_utils
+import litellm.litellm_core_utils.audio_utils.utils
 import litellm.litellm_core_utils.json_validation_rule
 from litellm.caching import DualCache
-from litellm.litellm_core_utils.core_helpers import (
-    get_file_check_sum,
-    map_finish_reason,
-)
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.litellm_core_utils.exception_mapping_utils import get_error_message
 from litellm.litellm_core_utils.get_llm_provider_logic import (
    _is_non_openai_azure_model,
@ -86,6 +84,7 @@ from litellm.types.utils import (
    Delta,
    Embedding,
    EmbeddingResponse,
+    FileTypes,
    ImageResponse,
    Message,
    ModelInfo,
@ -161,7 +160,6 @@ except Exception as e:
 from concurrent.futures import ThreadPoolExecutor
 from typing import (
    Any,
-    BinaryIO,
    Callable,
    Dict,
    Iterable,
@ -566,14 +564,17 @@ def function_setup(
            call_type == CallTypes.atranscription.value
            or call_type == CallTypes.transcription.value
        ):
-            _file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
-            file_checksum = get_file_check_sum(_file=_file_name)
-            file_name = _file_name.name
+            _file_obj: FileTypes = args[1] if len(args) > 1 else kwargs["file"]
+            file_checksum = (
+                litellm.litellm_core_utils.audio_utils.utils.get_audio_file_name(
+                    file_obj=_file_obj
+                )
+            )
            if "metadata" in kwargs:
                kwargs["metadata"]["file_checksum"] = file_checksum
            else:
                kwargs["metadata"] = {"file_checksum": file_checksum}
-            messages = file_name
+            messages = file_checksum
        elif (
            call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value
        ):