diff --git a/litellm/batches/main.py b/litellm/batches/main.py index e927a18b6..cd81cc44f 100644 --- a/litellm/batches/main.py +++ b/litellm/batches/main.py @@ -20,8 +20,8 @@ import httpx import litellm from litellm import client -from litellm.llms.azure import AzureBatchesAPI -from litellm.llms.openai import OpenAIBatchesAPI +from litellm.llms.AzureOpenAI.azure import AzureBatchesAPI +from litellm.llms.OpenAI.openai import OpenAIBatchesAPI from litellm.secret_managers.main import get_secret from litellm.types.llms.openai import ( Batch, diff --git a/litellm/caching.py b/litellm/caching.py index 13da3cb1e..7f67ee455 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -17,7 +17,7 @@ import time import traceback from datetime import timedelta from enum import Enum -from typing import Any, BinaryIO, List, Literal, Optional, Union +from typing import Any, List, Literal, Optional, Union from openai._models import BaseModel as OpenAIObject diff --git a/litellm/litellm_core_utils/core_helpers.py b/litellm/litellm_core_utils/core_helpers.py index a9e535316..9f5075c22 100644 --- a/litellm/litellm_core_utils/core_helpers.py +++ b/litellm/litellm_core_utils/core_helpers.py @@ -1,9 +1,10 @@ # What is this? ## Helper utilities import os -from typing import BinaryIO, List, Literal, Optional, Tuple +from typing import List, Literal, Optional, Tuple from litellm._logging import verbose_logger +from litellm.types.utils import FileTypes def map_finish_reason( @@ -88,18 +89,19 @@ def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None): return None -def get_file_check_sum(_file: BinaryIO): +def get_audio_file_name(file_obj: FileTypes) -> str: """ - Helper to safely get file checksum - used as a cache key + Safely get the name of a file-like object or return its string representation. + + Args: + file_obj (Any): A file-like object or any other object. + + Returns: + str: The name of the file if available, otherwise a string representation of the object. """ - try: - file_descriptor = _file.fileno() - file_stat = os.fstat(file_descriptor) - file_size = str(file_stat.st_size) - file_checksum = _file.name + file_size - return file_checksum - except Exception as e: - verbose_logger.error(f"Error getting file_checksum: {(str(e))}") - file_checksum = _file.name - return file_checksum - return file_checksum + if hasattr(file_obj, "name"): + return getattr(file_obj, "name") + elif hasattr(file_obj, "__str__"): + return str(file_obj) + else: + return repr(file_obj) diff --git a/litellm/llms/AzureOpenAI/audio_transcriptions.py b/litellm/llms/AzureOpenAI/audio_transcriptions.py new file mode 100644 index 000000000..db373797a --- /dev/null +++ b/litellm/llms/AzureOpenAI/audio_transcriptions.py @@ -0,0 +1,192 @@ +import uuid +from typing import Optional, Union + +import httpx +from openai import AsyncAzureOpenAI, AzureOpenAI +from pydantic import BaseModel + +import litellm +from litellm.litellm_core_utils.core_helpers import get_audio_file_name +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.types.utils import FileTypes +from litellm.utils import TranscriptionResponse, convert_to_model_response_object + +from .azure import ( + AzureChatCompletion, + get_azure_ad_token_from_oidc, + select_azure_base_url_or_endpoint, +) + + +class AzureAudioTranscription(AzureChatCompletion): + def audio_transcriptions( + self, + model: str, + audio_file: FileTypes, + optional_params: dict, + model_response: TranscriptionResponse, + timeout: float, + max_retries: int, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + api_version: Optional[str] = None, + client=None, + azure_ad_token: Optional[str] = None, + logging_obj=None, + atranscription: bool = False, + ): + data = {"model": model, "file": audio_file, **optional_params} + + # init AzureOpenAI Client + azure_client_params = { + "api_version": api_version, + "azure_endpoint": api_base, + "azure_deployment": model, + "timeout": timeout, + } + + azure_client_params = select_azure_base_url_or_endpoint( + azure_client_params=azure_client_params + ) + if api_key is not None: + azure_client_params["api_key"] = api_key + elif azure_ad_token is not None: + if azure_ad_token.startswith("oidc/"): + azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token) + azure_client_params["azure_ad_token"] = azure_ad_token + + if max_retries is not None: + azure_client_params["max_retries"] = max_retries + + if atranscription is True: + return self.async_audio_transcriptions( + audio_file=audio_file, + data=data, + model_response=model_response, + timeout=timeout, + api_key=api_key, + api_base=api_base, + client=client, + azure_client_params=azure_client_params, + max_retries=max_retries, + logging_obj=logging_obj, + ) + if client is None: + azure_client = AzureOpenAI(http_client=litellm.client_session, **azure_client_params) # type: ignore + else: + azure_client = client + + ## LOGGING + logging_obj.pre_call( + input=f"audio_file_{uuid.uuid4()}", + api_key=azure_client.api_key, + additional_args={ + "headers": {"Authorization": f"Bearer {azure_client.api_key}"}, + "api_base": azure_client._base_url._uri_reference, + "atranscription": True, + "complete_input_dict": data, + }, + ) + + response = azure_client.audio.transcriptions.create( + **data, timeout=timeout # type: ignore + ) + + if isinstance(response, BaseModel): + stringified_response = response.model_dump() + else: + stringified_response = TranscriptionResponse(text=response).model_dump() + + ## LOGGING + logging_obj.post_call( + input=get_audio_file_name(audio_file), + api_key=api_key, + additional_args={"complete_input_dict": data}, + original_response=stringified_response, + ) + hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"} + final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore + return final_response + + async def async_audio_transcriptions( + self, + audio_file: FileTypes, + data: dict, + model_response: TranscriptionResponse, + timeout: float, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + client=None, + azure_client_params=None, + max_retries=None, + logging_obj=None, + ): + response = None + try: + if client is None: + async_azure_client = AsyncAzureOpenAI( + **azure_client_params, + http_client=litellm.aclient_session, + ) + else: + async_azure_client = client + + ## LOGGING + logging_obj.pre_call( + input=f"audio_file_{uuid.uuid4()}", + api_key=async_azure_client.api_key, + additional_args={ + "headers": { + "Authorization": f"Bearer {async_azure_client.api_key}" + }, + "api_base": async_azure_client._base_url._uri_reference, + "atranscription": True, + "complete_input_dict": data, + }, + ) + + raw_response = ( + await async_azure_client.audio.transcriptions.with_raw_response.create( + **data, timeout=timeout + ) + ) # type: ignore + + headers = dict(raw_response.headers) + response = raw_response.parse() + + if isinstance(response, BaseModel): + stringified_response = response.model_dump() + else: + stringified_response = TranscriptionResponse(text=response).model_dump() + + ## LOGGING + logging_obj.post_call( + input=get_audio_file_name(audio_file), + api_key=api_key, + additional_args={ + "headers": { + "Authorization": f"Bearer {async_azure_client.api_key}" + }, + "api_base": async_azure_client._base_url._uri_reference, + "atranscription": True, + "complete_input_dict": data, + }, + original_response=stringified_response, + ) + hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"} + response = convert_to_model_response_object( + _response_headers=headers, + response_object=stringified_response, + model_response_object=model_response, + hidden_params=hidden_params, + response_type="audio_transcription", + ) # type: ignore + return response + except Exception as e: + ## LOGGING + logging_obj.post_call( + input=input, + api_key=api_key, + original_response=str(e), + ) + raise e diff --git a/litellm/llms/azure.py b/litellm/llms/AzureOpenAI/azure.py similarity index 94% rename from litellm/llms/azure.py rename to litellm/llms/AzureOpenAI/azure.py index 222961e10..a14644e18 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/AzureOpenAI/azure.py @@ -4,17 +4,7 @@ import os import time import types import uuid -from typing import ( - Any, - BinaryIO, - Callable, - Coroutine, - Iterable, - List, - Literal, - Optional, - Union, -) +from typing import Any, Callable, Coroutine, Iterable, List, Literal, Optional, Union import httpx # type: ignore import requests @@ -27,6 +17,7 @@ from litellm import ImageResponse, OpenAIConfig from litellm.caching import DualCache from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler +from litellm.types.utils import FileTypes from litellm.utils import ( Choices, CustomStreamWrapper, @@ -39,7 +30,7 @@ from litellm.utils import ( modify_url, ) -from ..types.llms.openai import ( +from ...types.llms.openai import ( Assistant, AssistantEventHandler, AssistantStreamManager, @@ -63,7 +54,7 @@ from ..types.llms.openai import ( SyncCursorPage, Thread, ) -from .base import BaseLLM +from ..base import BaseLLM azure_ad_cache = DualCache() @@ -1570,178 +1561,6 @@ class AzureChatCompletion(BaseLLM): else: raise AzureOpenAIError(status_code=500, message=str(e)) - def audio_transcriptions( - self, - model: str, - audio_file: BinaryIO, - optional_params: dict, - model_response: TranscriptionResponse, - timeout: float, - max_retries: int, - api_key: Optional[str] = None, - api_base: Optional[str] = None, - api_version: Optional[str] = None, - client=None, - azure_ad_token: Optional[str] = None, - logging_obj=None, - atranscription: bool = False, - ): - data = {"model": model, "file": audio_file, **optional_params} - - # init AzureOpenAI Client - azure_client_params = { - "api_version": api_version, - "azure_endpoint": api_base, - "azure_deployment": model, - "timeout": timeout, - } - - azure_client_params = select_azure_base_url_or_endpoint( - azure_client_params=azure_client_params - ) - if api_key is not None: - azure_client_params["api_key"] = api_key - elif azure_ad_token is not None: - if azure_ad_token.startswith("oidc/"): - azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token) - azure_client_params["azure_ad_token"] = azure_ad_token - - if max_retries is not None: - azure_client_params["max_retries"] = max_retries - - if atranscription is True: - return self.async_audio_transcriptions( - audio_file=audio_file, - data=data, - model_response=model_response, - timeout=timeout, - api_key=api_key, - api_base=api_base, - client=client, - azure_client_params=azure_client_params, - max_retries=max_retries, - logging_obj=logging_obj, - ) - if client is None: - azure_client = AzureOpenAI(http_client=litellm.client_session, **azure_client_params) # type: ignore - else: - azure_client = client - - ## LOGGING - logging_obj.pre_call( - input=f"audio_file_{uuid.uuid4()}", - api_key=azure_client.api_key, - additional_args={ - "headers": {"Authorization": f"Bearer {azure_client.api_key}"}, - "api_base": azure_client._base_url._uri_reference, - "atranscription": True, - "complete_input_dict": data, - }, - ) - - response = azure_client.audio.transcriptions.create( - **data, timeout=timeout # type: ignore - ) - - if isinstance(response, BaseModel): - stringified_response = response.model_dump() - else: - stringified_response = TranscriptionResponse(text=response).model_dump() - - ## LOGGING - logging_obj.post_call( - input=audio_file.name, - api_key=api_key, - additional_args={"complete_input_dict": data}, - original_response=stringified_response, - ) - hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"} - final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore - return final_response - - async def async_audio_transcriptions( - self, - audio_file: BinaryIO, - data: dict, - model_response: TranscriptionResponse, - timeout: float, - api_key: Optional[str] = None, - api_base: Optional[str] = None, - client=None, - azure_client_params=None, - max_retries=None, - logging_obj=None, - ): - response = None - try: - if client is None: - async_azure_client = AsyncAzureOpenAI( - **azure_client_params, - http_client=litellm.aclient_session, - ) - else: - async_azure_client = client - - ## LOGGING - logging_obj.pre_call( - input=f"audio_file_{uuid.uuid4()}", - api_key=async_azure_client.api_key, - additional_args={ - "headers": { - "Authorization": f"Bearer {async_azure_client.api_key}" - }, - "api_base": async_azure_client._base_url._uri_reference, - "atranscription": True, - "complete_input_dict": data, - }, - ) - - raw_response = ( - await async_azure_client.audio.transcriptions.with_raw_response.create( - **data, timeout=timeout - ) - ) # type: ignore - - headers = dict(raw_response.headers) - response = raw_response.parse() - - if isinstance(response, BaseModel): - stringified_response = response.model_dump() - else: - stringified_response = TranscriptionResponse(text=response).model_dump() - - ## LOGGING - logging_obj.post_call( - input=audio_file.name, - api_key=api_key, - additional_args={ - "headers": { - "Authorization": f"Bearer {async_azure_client.api_key}" - }, - "api_base": async_azure_client._base_url._uri_reference, - "atranscription": True, - "complete_input_dict": data, - }, - original_response=stringified_response, - ) - hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"} - response = convert_to_model_response_object( - _response_headers=headers, - response_object=stringified_response, - model_response_object=model_response, - hidden_params=hidden_params, - response_type="audio_transcription", - ) # type: ignore - return response - except Exception as e: - ## LOGGING - logging_obj.post_call( - input=input, - api_key=api_key, - original_response=str(e), - ) - raise e - def audio_speech( self, model: str, diff --git a/litellm/llms/OpenAI/audio_transcriptions.py b/litellm/llms/OpenAI/audio_transcriptions.py new file mode 100644 index 000000000..587ee471e --- /dev/null +++ b/litellm/llms/OpenAI/audio_transcriptions.py @@ -0,0 +1,177 @@ +from typing import Optional, Union + +import httpx +from openai import AsyncOpenAI, OpenAI +from pydantic import BaseModel + +import litellm +from litellm.litellm_core_utils.core_helpers import get_audio_file_name +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.types.utils import FileTypes +from litellm.utils import TranscriptionResponse, convert_to_model_response_object + +from .openai import OpenAIChatCompletion + + +class OpenAIAudioTranscription(OpenAIChatCompletion): + # Audio Transcriptions + async def make_openai_audio_transcriptions_request( + self, + openai_aclient: AsyncOpenAI, + data: dict, + timeout: Union[float, httpx.Timeout], + ): + """ + Helper to: + - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True + - call openai_aclient.audio.transcriptions.create by default + """ + try: + if litellm.return_response_headers is True: + raw_response = ( + await openai_aclient.audio.transcriptions.with_raw_response.create( + **data, timeout=timeout + ) + ) # type: ignore + headers = dict(raw_response.headers) + response = raw_response.parse() + return headers, response + else: + response = await openai_aclient.audio.transcriptions.create(**data, timeout=timeout) # type: ignore + return None, response + except Exception as e: + raise e + + def make_sync_openai_audio_transcriptions_request( + self, + openai_client: OpenAI, + data: dict, + timeout: Union[float, httpx.Timeout], + ): + """ + Helper to: + - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True + - call openai_aclient.audio.transcriptions.create by default + """ + try: + if litellm.return_response_headers is True: + raw_response = ( + openai_client.audio.transcriptions.with_raw_response.create( + **data, timeout=timeout + ) + ) # type: ignore + headers = dict(raw_response.headers) + response = raw_response.parse() + return headers, response + else: + response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore + return None, response + except Exception as e: + raise e + + def audio_transcriptions( + self, + model: str, + audio_file: FileTypes, + optional_params: dict, + model_response: TranscriptionResponse, + timeout: float, + max_retries: int, + api_key: Optional[str], + api_base: Optional[str], + client=None, + logging_obj=None, + atranscription: bool = False, + ): + data = {"model": model, "file": audio_file, **optional_params} + if atranscription is True: + return self.async_audio_transcriptions( + audio_file=audio_file, + data=data, + model_response=model_response, + timeout=timeout, + api_key=api_key, + api_base=api_base, + client=client, + max_retries=max_retries, + logging_obj=logging_obj, + ) + + openai_client = self._get_openai_client( + is_async=False, + api_key=api_key, + api_base=api_base, + timeout=timeout, + max_retries=max_retries, + ) + _, response = self.make_sync_openai_audio_transcriptions_request( + openai_client=openai_client, + data=data, + timeout=timeout, + ) + + if isinstance(response, BaseModel): + stringified_response = response.model_dump() + else: + stringified_response = TranscriptionResponse(text=response).model_dump() + + ## LOGGING + logging_obj.post_call( + input=get_audio_file_name(audio_file), + api_key=api_key, + additional_args={"complete_input_dict": data}, + original_response=stringified_response, + ) + hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"} + final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore + return final_response + + async def async_audio_transcriptions( + self, + audio_file: FileTypes, + data: dict, + model_response: TranscriptionResponse, + timeout: float, + logging_obj: LiteLLMLoggingObj, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + client=None, + max_retries=None, + ): + try: + openai_aclient = self._get_openai_client( + is_async=True, + api_key=api_key, + api_base=api_base, + timeout=timeout, + max_retries=max_retries, + client=client, + ) + + headers, response = await self.make_openai_audio_transcriptions_request( + openai_aclient=openai_aclient, + data=data, + timeout=timeout, + ) + logging_obj.model_call_details["response_headers"] = headers + if isinstance(response, BaseModel): + stringified_response = response.model_dump() + else: + stringified_response = TranscriptionResponse(text=response).model_dump() + ## LOGGING + logging_obj.post_call( + input=get_audio_file_name(audio_file), + api_key=api_key, + additional_args={"complete_input_dict": data}, + original_response=stringified_response, + ) + hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"} + return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore + except Exception as e: + ## LOGGING + logging_obj.post_call( + input=input, + api_key=api_key, + original_response=str(e), + ) + raise e diff --git a/litellm/llms/openai.py b/litellm/llms/OpenAI/openai.py similarity index 95% rename from litellm/llms/openai.py rename to litellm/llms/OpenAI/openai.py index e7a10c5cd..8d112f52a 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/OpenAI/openai.py @@ -4,16 +4,7 @@ import os import time import traceback import types -from typing import ( - Any, - BinaryIO, - Callable, - Coroutine, - Iterable, - Literal, - Optional, - Union, -) +from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union import httpx import openai @@ -33,14 +24,13 @@ from litellm.utils import ( Message, ModelResponse, TextCompletionResponse, - TranscriptionResponse, Usage, convert_to_model_response_object, ) -from ..types.llms.openai import * -from .base import BaseLLM -from .prompt_templates.factory import custom_prompt, prompt_factory +from ...types.llms.openai import * +from ..base import BaseLLM +from ..prompt_templates.factory import custom_prompt, prompt_factory class OpenAIError(Exception): @@ -1608,168 +1598,6 @@ class OpenAIChatCompletion(BaseLLM): else: raise OpenAIError(status_code=500, message=str(e)) - # Audio Transcriptions - async def make_openai_audio_transcriptions_request( - self, - openai_aclient: AsyncOpenAI, - data: dict, - timeout: Union[float, httpx.Timeout], - ): - """ - Helper to: - - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True - - call openai_aclient.audio.transcriptions.create by default - """ - try: - if litellm.return_response_headers is True: - raw_response = ( - await openai_aclient.audio.transcriptions.with_raw_response.create( - **data, timeout=timeout - ) - ) # type: ignore - headers = dict(raw_response.headers) - response = raw_response.parse() - return headers, response - else: - response = await openai_aclient.audio.transcriptions.create(**data, timeout=timeout) # type: ignore - return None, response - except Exception as e: - raise e - - def make_sync_openai_audio_transcriptions_request( - self, - openai_client: OpenAI, - data: dict, - timeout: Union[float, httpx.Timeout], - ): - """ - Helper to: - - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True - - call openai_aclient.audio.transcriptions.create by default - """ - try: - if litellm.return_response_headers is True: - raw_response = ( - openai_client.audio.transcriptions.with_raw_response.create( - **data, timeout=timeout - ) - ) # type: ignore - headers = dict(raw_response.headers) - response = raw_response.parse() - return headers, response - else: - response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore - return None, response - except Exception as e: - raise e - - def audio_transcriptions( - self, - model: str, - audio_file: BinaryIO, - optional_params: dict, - model_response: TranscriptionResponse, - timeout: float, - max_retries: int, - api_key: Optional[str], - api_base: Optional[str], - client=None, - logging_obj=None, - atranscription: bool = False, - ): - data = {"model": model, "file": audio_file, **optional_params} - if atranscription is True: - return self.async_audio_transcriptions( - audio_file=audio_file, - data=data, - model_response=model_response, - timeout=timeout, - api_key=api_key, - api_base=api_base, - client=client, - max_retries=max_retries, - logging_obj=logging_obj, - ) - - openai_client = self._get_openai_client( - is_async=False, - api_key=api_key, - api_base=api_base, - timeout=timeout, - max_retries=max_retries, - ) - _, response = self.make_sync_openai_audio_transcriptions_request( - openai_client=openai_client, - data=data, - timeout=timeout, - ) - - if isinstance(response, BaseModel): - stringified_response = response.model_dump() - else: - stringified_response = TranscriptionResponse(text=response).model_dump() - - ## LOGGING - logging_obj.post_call( - input=audio_file.name, - api_key=api_key, - additional_args={"complete_input_dict": data}, - original_response=stringified_response, - ) - hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"} - final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore - return final_response - - async def async_audio_transcriptions( - self, - audio_file: BinaryIO, - data: dict, - model_response: TranscriptionResponse, - timeout: float, - logging_obj: LiteLLMLoggingObj, - api_key: Optional[str] = None, - api_base: Optional[str] = None, - client=None, - max_retries=None, - ): - try: - openai_aclient = self._get_openai_client( - is_async=True, - api_key=api_key, - api_base=api_base, - timeout=timeout, - max_retries=max_retries, - client=client, - ) - - headers, response = await self.make_openai_audio_transcriptions_request( - openai_aclient=openai_aclient, - data=data, - timeout=timeout, - ) - logging_obj.model_call_details["response_headers"] = headers - if isinstance(response, BaseModel): - stringified_response = response.model_dump() - else: - stringified_response = TranscriptionResponse(text=response).model_dump() - ## LOGGING - logging_obj.post_call( - input=audio_file.name, - api_key=api_key, - additional_args={"complete_input_dict": data}, - original_response=stringified_response, - ) - hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"} - return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore - except Exception as e: - ## LOGGING - logging_obj.post_call( - input=input, - api_key=api_key, - original_response=str(e), - ) - raise e - def audio_speech( self, model: str, diff --git a/litellm/llms/azure_text.py b/litellm/llms/azure_text.py index fb6e4875e..9a8d462e5 100644 --- a/litellm/llms/azure_text.py +++ b/litellm/llms/azure_text.py @@ -1,7 +1,7 @@ import json import types # type: ignore import uuid -from typing import Any, BinaryIO, Callable, Optional, Union +from typing import Any, Callable, Optional, Union import httpx import requests @@ -19,8 +19,8 @@ from litellm.utils import ( convert_to_model_response_object, ) -from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig from .base import BaseLLM +from .OpenAI.openai import OpenAITextCompletion, OpenAITextCompletionConfig from .prompt_templates.factory import custom_prompt, prompt_factory openai_text_completion_config = OpenAITextCompletionConfig() diff --git a/litellm/main.py b/litellm/main.py index 9e7297e11..bb2c1c47f 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -22,18 +22,7 @@ import uuid from concurrent.futures import ThreadPoolExecutor from copy import deepcopy from functools import partial -from typing import ( - Any, - BinaryIO, - Callable, - Dict, - List, - Literal, - Mapping, - Optional, - Type, - Union, -) +from typing import Any, Callable, Dict, List, Literal, Mapping, Optional, Type, Union import dotenv import httpx @@ -93,8 +82,9 @@ from .llms import ( from .llms.AI21 import completion as ai21 from .llms.anthropic.chat import AnthropicChatCompletion from .llms.anthropic.completion import AnthropicTextCompletion -from .llms.azure import AzureChatCompletion, _check_dynamic_azure_params from .llms.azure_text import AzureTextCompletion +from .llms.AzureOpenAI.audio_transcriptions import AzureAudioTranscription +from .llms.AzureOpenAI.azure import AzureChatCompletion, _check_dynamic_azure_params from .llms.bedrock import image_generation as bedrock_image_generation # type: ignore from .llms.bedrock.chat import BedrockConverseLLM, BedrockLLM from .llms.bedrock.embed.embedding import BedrockEmbedding @@ -104,7 +94,8 @@ from .llms.cohere import embed as cohere_embed from .llms.custom_llm import CustomLLM, custom_chat_llm_router from .llms.databricks import DatabricksChatCompletion from .llms.huggingface_restapi import Huggingface -from .llms.openai import OpenAIChatCompletion, OpenAITextCompletion +from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription +from .llms.OpenAI.openai import OpenAIChatCompletion, OpenAITextCompletion from .llms.predibase import PredibaseChatCompletion from .llms.prompt_templates.factory import ( custom_prompt, @@ -146,6 +137,7 @@ from .types.llms.openai import HttpxBinaryResponseContent from .types.utils import ( AdapterCompletionStreamWrapper, ChatCompletionMessageToolCall, + FileTypes, HiddenParams, all_litellm_params, ) @@ -169,11 +161,13 @@ from litellm.utils import ( ####### ENVIRONMENT VARIABLES ################### openai_chat_completions = OpenAIChatCompletion() openai_text_completions = OpenAITextCompletion() +openai_audio_transcriptions = OpenAIAudioTranscription() databricks_chat_completions = DatabricksChatCompletion() anthropic_chat_completions = AnthropicChatCompletion() anthropic_text_completions = AnthropicTextCompletion() azure_chat_completions = AzureChatCompletion() azure_text_completions = AzureTextCompletion() +azure_audio_transcriptions = AzureAudioTranscription() huggingface = Huggingface() predibase_chat_completions = PredibaseChatCompletion() codestral_text_completions = CodestralTextCompletion() @@ -4614,7 +4608,7 @@ async def atranscription(*args, **kwargs) -> TranscriptionResponse: @client def transcription( model: str, - file: BinaryIO, + file: FileTypes, ## OPTIONAL OPENAI PARAMS ## language: Optional[str] = None, prompt: Optional[str] = None, @@ -4704,7 +4698,7 @@ def transcription( or get_secret("AZURE_API_KEY") ) # type: ignore - response = azure_chat_completions.audio_transcriptions( + response = azure_audio_transcriptions.audio_transcriptions( model=model, audio_file=file, optional_params=optional_params, @@ -4738,7 +4732,7 @@ def transcription( or litellm.openai_key or get_secret("OPENAI_API_KEY") ) # type: ignore - response = openai_chat_completions.audio_transcriptions( + response = openai_audio_transcriptions.audio_transcriptions( model=model, audio_file=file, optional_params=optional_params, diff --git a/litellm/router.py b/litellm/router.py index 2743a36b9..233331e80 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -24,18 +24,7 @@ import traceback import uuid from collections import defaultdict from datetime import datetime -from typing import ( - Any, - BinaryIO, - Dict, - Iterable, - List, - Literal, - Optional, - Tuple, - TypedDict, - Union, -) +from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, TypedDict, Union import httpx import openai @@ -48,7 +37,7 @@ from litellm.assistants.main import AssistantDeleted from litellm.caching import DualCache, InMemoryCache, RedisCache from litellm.integrations.custom_logger import CustomLogger from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging -from litellm.llms.azure import get_azure_ad_token_from_oidc +from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc from litellm.router_strategy.least_busy import LeastBusyLoggingHandler from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler @@ -1342,7 +1331,7 @@ class Router: self.fail_calls[model_name] += 1 raise e - async def atranscription(self, file: BinaryIO, model: str, **kwargs): + async def atranscription(self, file: FileTypes, model: str, **kwargs): """ Example Usage: @@ -1386,7 +1375,7 @@ class Router: ) raise e - async def _atranscription(self, file: BinaryIO, model: str, **kwargs): + async def _atranscription(self, file: FileTypes, model: str, **kwargs): try: verbose_router_logger.debug( f"Inside _atranscription()- model: {model}; kwargs: {kwargs}" diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 788199c00..9d65fe87e 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -1,16 +1,5 @@ from os import PathLike -from typing import ( - IO, - Any, - BinaryIO, - Iterable, - List, - Literal, - Mapping, - Optional, - Tuple, - Union, -) +from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union from openai._legacy_response import HttpxBinaryResponseContent from openai.lib.streaming._assistants import ( diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 72cc98b3e..696fb5b83 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -5,6 +5,7 @@ from enum import Enum from typing import Any, Dict, List, Literal, Optional, Tuple, Union from openai._models import BaseModel as OpenAIObject +from openai.types.audio.transcription_create_params import FileTypes from openai.types.completion_usage import CompletionUsage from pydantic import ConfigDict, Field, PrivateAttr from typing_extensions import Callable, Dict, Required, TypedDict, override diff --git a/litellm/utils.py b/litellm/utils.py index 33d3a59a3..7587563d5 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -58,7 +58,7 @@ import litellm.litellm_core_utils import litellm.litellm_core_utils.json_validation_rule from litellm.caching import DualCache from litellm.litellm_core_utils.core_helpers import ( - get_file_check_sum, + get_audio_file_name, map_finish_reason, ) from litellm.litellm_core_utils.exception_mapping_utils import get_error_message @@ -86,6 +86,7 @@ from litellm.types.utils import ( Delta, Embedding, EmbeddingResponse, + FileTypes, ImageResponse, Message, ModelInfo, @@ -161,7 +162,6 @@ except Exception as e: from concurrent.futures import ThreadPoolExecutor from typing import ( Any, - BinaryIO, Callable, Dict, Iterable, @@ -566,14 +566,13 @@ def function_setup( call_type == CallTypes.atranscription.value or call_type == CallTypes.transcription.value ): - _file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"] - file_checksum = get_file_check_sum(_file=_file_name) - file_name = _file_name.name + _file_obj: FileTypes = args[1] if len(args) > 1 else kwargs["file"] + file_checksum = get_audio_file_name(file_obj=_file_obj) if "metadata" in kwargs: kwargs["metadata"]["file_checksum"] = file_checksum else: kwargs["metadata"] = {"file_checksum": file_checksum} - messages = file_name + messages = _file_obj elif ( call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value ):