diff --git a/litellm/__init__.py b/litellm/__init__.py index 060e14b78..25cae8328 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -924,7 +924,7 @@ from .llms.bedrock.embed.amazon_titan_v2_transformation import ( AmazonTitanV2Config, ) from .llms.bedrock.embed.cohere_transformation import BedrockCohereEmbeddingConfig -from .llms.openai import ( +from .llms.OpenAI.openai import ( OpenAIConfig, OpenAITextCompletionConfig, MistralConfig, @@ -939,7 +939,7 @@ from .llms.AI21.chat import AI21ChatConfig from .llms.fireworks_ai import FireworksAIConfig from .llms.volcengine import VolcEngineConfig from .llms.text_completion_codestral import MistralTextCompletionConfig -from .llms.azure import ( +from .llms.AzureOpenAI.azure import ( AzureOpenAIConfig, AzureOpenAIError, AzureOpenAIAssistantsAPIConfig, diff --git a/litellm/assistants/main.py b/litellm/assistants/main.py index ba169f5e2..0ea5860ae 100644 --- a/litellm/assistants/main.py +++ b/litellm/assistants/main.py @@ -21,8 +21,8 @@ from litellm.utils import ( supports_httpx_timeout, ) -from ..llms.azure import AzureAssistantsAPI -from ..llms.openai import OpenAIAssistantsAPI +from ..llms.AzureOpenAI.azure import AzureAssistantsAPI +from ..llms.OpenAI.openai import OpenAIAssistantsAPI from ..types.llms.openai import * from ..types.router import * from .utils import get_optional_params_add_message @@ -184,6 +184,21 @@ def get_assistants( request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore ), ) + + if response is None: + raise litellm.exceptions.BadRequestError( + message="LiteLLM doesn't support {} for 'get_assistants'. Only 'openai' is supported.".format( + custom_llm_provider + ), + model="n/a", + llm_provider=custom_llm_provider, + response=httpx.Response( + status_code=400, + content="Unsupported provider", + request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore + ), + ) + return response diff --git a/litellm/batches/main.py b/litellm/batches/main.py index e927a18b6..cd81cc44f 100644 --- a/litellm/batches/main.py +++ b/litellm/batches/main.py @@ -20,8 +20,8 @@ import httpx import litellm from litellm import client -from litellm.llms.azure import AzureBatchesAPI -from litellm.llms.openai import OpenAIBatchesAPI +from litellm.llms.AzureOpenAI.azure import AzureBatchesAPI +from litellm.llms.OpenAI.openai import OpenAIBatchesAPI from litellm.secret_managers.main import get_secret from litellm.types.llms.openai import ( Batch, diff --git a/litellm/caching.py b/litellm/caching.py index 13da3cb1e..7f67ee455 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -17,7 +17,7 @@ import time import traceback from datetime import timedelta from enum import Enum -from typing import Any, BinaryIO, List, Literal, Optional, Union +from typing import Any, List, Literal, Optional, Union from openai._models import BaseModel as OpenAIObject diff --git a/litellm/files/main.py b/litellm/files/main.py index 1ed1c1e61..84fb50652 100644 --- a/litellm/files/main.py +++ b/litellm/files/main.py @@ -16,7 +16,7 @@ import httpx import litellm from litellm import client, get_secret from litellm.llms.files_apis.azure import AzureOpenAIFilesAPI -from litellm.llms.openai import FileDeleted, FileObject, OpenAIFilesAPI +from litellm.llms.OpenAI.openai import FileDeleted, FileObject, OpenAIFilesAPI from litellm.types.llms.openai import ( Batch, CreateFileRequest, diff --git a/litellm/litellm_core_utils/audio_utils/utils.py b/litellm/litellm_core_utils/audio_utils/utils.py new file mode 100644 index 000000000..ab19dac9c --- /dev/null +++ b/litellm/litellm_core_utils/audio_utils/utils.py @@ -0,0 +1,23 @@ +""" +Utils used for litellm.transcription() and litellm.atranscription() +""" + +from litellm.types.utils import FileTypes + + +def get_audio_file_name(file_obj: FileTypes) -> str: + """ + Safely get the name of a file-like object or return its string representation. + + Args: + file_obj (Any): A file-like object or any other object. + + Returns: + str: The name of the file if available, otherwise a string representation of the object. + """ + if hasattr(file_obj, "name"): + return getattr(file_obj, "name") + elif hasattr(file_obj, "__str__"): + return str(file_obj) + else: + return repr(file_obj) diff --git a/litellm/litellm_core_utils/core_helpers.py b/litellm/litellm_core_utils/core_helpers.py index a9e535316..269844ce8 100644 --- a/litellm/litellm_core_utils/core_helpers.py +++ b/litellm/litellm_core_utils/core_helpers.py @@ -1,7 +1,7 @@ # What is this? ## Helper utilities import os -from typing import BinaryIO, List, Literal, Optional, Tuple +from typing import List, Literal, Optional, Tuple from litellm._logging import verbose_logger @@ -86,20 +86,3 @@ def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None): return kwargs["litellm_parent_otel_span"] except: return None - - -def get_file_check_sum(_file: BinaryIO): - """ - Helper to safely get file checksum - used as a cache key - """ - try: - file_descriptor = _file.fileno() - file_stat = os.fstat(file_descriptor) - file_size = str(file_stat.st_size) - file_checksum = _file.name + file_size - return file_checksum - except Exception as e: - verbose_logger.error(f"Error getting file_checksum: {(str(e))}") - file_checksum = _file.name - return file_checksum - return file_checksum diff --git a/litellm/llms/AzureOpenAI/audio_transcriptions.py b/litellm/llms/AzureOpenAI/audio_transcriptions.py new file mode 100644 index 000000000..cecdfdc21 --- /dev/null +++ b/litellm/llms/AzureOpenAI/audio_transcriptions.py @@ -0,0 +1,192 @@ +import uuid +from typing import Optional, Union + +import httpx +from openai import AsyncAzureOpenAI, AzureOpenAI +from pydantic import BaseModel + +import litellm +from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.types.utils import FileTypes +from litellm.utils import TranscriptionResponse, convert_to_model_response_object + +from .azure import ( + AzureChatCompletion, + get_azure_ad_token_from_oidc, + select_azure_base_url_or_endpoint, +) + + +class AzureAudioTranscription(AzureChatCompletion): + def audio_transcriptions( + self, + model: str, + audio_file: FileTypes, + optional_params: dict, + model_response: TranscriptionResponse, + timeout: float, + max_retries: int, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + api_version: Optional[str] = None, + client=None, + azure_ad_token: Optional[str] = None, + logging_obj=None, + atranscription: bool = False, + ): + data = {"model": model, "file": audio_file, **optional_params} + + # init AzureOpenAI Client + azure_client_params = { + "api_version": api_version, + "azure_endpoint": api_base, + "azure_deployment": model, + "timeout": timeout, + } + + azure_client_params = select_azure_base_url_or_endpoint( + azure_client_params=azure_client_params + ) + if api_key is not None: + azure_client_params["api_key"] = api_key + elif azure_ad_token is not None: + if azure_ad_token.startswith("oidc/"): + azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token) + azure_client_params["azure_ad_token"] = azure_ad_token + + if max_retries is not None: + azure_client_params["max_retries"] = max_retries + + if atranscription is True: + return self.async_audio_transcriptions( + audio_file=audio_file, + data=data, + model_response=model_response, + timeout=timeout, + api_key=api_key, + api_base=api_base, + client=client, + azure_client_params=azure_client_params, + max_retries=max_retries, + logging_obj=logging_obj, + ) + if client is None: + azure_client = AzureOpenAI(http_client=litellm.client_session, **azure_client_params) # type: ignore + else: + azure_client = client + + ## LOGGING + logging_obj.pre_call( + input=f"audio_file_{uuid.uuid4()}", + api_key=azure_client.api_key, + additional_args={ + "headers": {"Authorization": f"Bearer {azure_client.api_key}"}, + "api_base": azure_client._base_url._uri_reference, + "atranscription": True, + "complete_input_dict": data, + }, + ) + + response = azure_client.audio.transcriptions.create( + **data, timeout=timeout # type: ignore + ) + + if isinstance(response, BaseModel): + stringified_response = response.model_dump() + else: + stringified_response = TranscriptionResponse(text=response).model_dump() + + ## LOGGING + logging_obj.post_call( + input=get_audio_file_name(audio_file), + api_key=api_key, + additional_args={"complete_input_dict": data}, + original_response=stringified_response, + ) + hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"} + final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore + return final_response + + async def async_audio_transcriptions( + self, + audio_file: FileTypes, + data: dict, + model_response: TranscriptionResponse, + timeout: float, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + client=None, + azure_client_params=None, + max_retries=None, + logging_obj=None, + ): + response = None + try: + if client is None: + async_azure_client = AsyncAzureOpenAI( + **azure_client_params, + http_client=litellm.aclient_session, + ) + else: + async_azure_client = client + + ## LOGGING + logging_obj.pre_call( + input=f"audio_file_{uuid.uuid4()}", + api_key=async_azure_client.api_key, + additional_args={ + "headers": { + "Authorization": f"Bearer {async_azure_client.api_key}" + }, + "api_base": async_azure_client._base_url._uri_reference, + "atranscription": True, + "complete_input_dict": data, + }, + ) + + raw_response = ( + await async_azure_client.audio.transcriptions.with_raw_response.create( + **data, timeout=timeout + ) + ) # type: ignore + + headers = dict(raw_response.headers) + response = raw_response.parse() + + if isinstance(response, BaseModel): + stringified_response = response.model_dump() + else: + stringified_response = TranscriptionResponse(text=response).model_dump() + + ## LOGGING + logging_obj.post_call( + input=get_audio_file_name(audio_file), + api_key=api_key, + additional_args={ + "headers": { + "Authorization": f"Bearer {async_azure_client.api_key}" + }, + "api_base": async_azure_client._base_url._uri_reference, + "atranscription": True, + "complete_input_dict": data, + }, + original_response=stringified_response, + ) + hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"} + response = convert_to_model_response_object( + _response_headers=headers, + response_object=stringified_response, + model_response_object=model_response, + hidden_params=hidden_params, + response_type="audio_transcription", + ) # type: ignore + return response + except Exception as e: + ## LOGGING + logging_obj.post_call( + input=input, + api_key=api_key, + original_response=str(e), + ) + raise e diff --git a/litellm/llms/azure.py b/litellm/llms/AzureOpenAI/azure.py similarity index 93% rename from litellm/llms/azure.py rename to litellm/llms/AzureOpenAI/azure.py index eb9101c9b..70f13375d 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/AzureOpenAI/azure.py @@ -4,17 +4,7 @@ import os import time import types import uuid -from typing import ( - Any, - BinaryIO, - Callable, - Coroutine, - Iterable, - List, - Literal, - Optional, - Union, -) +from typing import Any, Callable, Coroutine, Iterable, List, Literal, Optional, Union import httpx # type: ignore import requests @@ -27,6 +17,7 @@ from litellm import ImageResponse, OpenAIConfig from litellm.caching import DualCache from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler +from litellm.types.utils import FileTypes from litellm.utils import ( Choices, CustomStreamWrapper, @@ -39,7 +30,7 @@ from litellm.utils import ( modify_url, ) -from ..types.llms.openai import ( +from ...types.llms.openai import ( Assistant, AssistantEventHandler, AssistantStreamManager, @@ -63,7 +54,7 @@ from ..types.llms.openai import ( SyncCursorPage, Thread, ) -from .base import BaseLLM +from ..base import BaseLLM azure_ad_cache = DualCache() @@ -1571,178 +1562,6 @@ class AzureChatCompletion(BaseLLM): else: raise AzureOpenAIError(status_code=500, message=str(e)) - def audio_transcriptions( - self, - model: str, - audio_file: BinaryIO, - optional_params: dict, - model_response: TranscriptionResponse, - timeout: float, - max_retries: int, - api_key: Optional[str] = None, - api_base: Optional[str] = None, - api_version: Optional[str] = None, - client=None, - azure_ad_token: Optional[str] = None, - logging_obj=None, - atranscription: bool = False, - ): - data = {"model": model, "file": audio_file, **optional_params} - - # init AzureOpenAI Client - azure_client_params = { - "api_version": api_version, - "azure_endpoint": api_base, - "azure_deployment": model, - "timeout": timeout, - } - - azure_client_params = select_azure_base_url_or_endpoint( - azure_client_params=azure_client_params - ) - if api_key is not None: - azure_client_params["api_key"] = api_key - elif azure_ad_token is not None: - if azure_ad_token.startswith("oidc/"): - azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token) - azure_client_params["azure_ad_token"] = azure_ad_token - - if max_retries is not None: - azure_client_params["max_retries"] = max_retries - - if atranscription is True: - return self.async_audio_transcriptions( - audio_file=audio_file, - data=data, - model_response=model_response, - timeout=timeout, - api_key=api_key, - api_base=api_base, - client=client, - azure_client_params=azure_client_params, - max_retries=max_retries, - logging_obj=logging_obj, - ) - if client is None: - azure_client = AzureOpenAI(http_client=litellm.client_session, **azure_client_params) # type: ignore - else: - azure_client = client - - ## LOGGING - logging_obj.pre_call( - input=f"audio_file_{uuid.uuid4()}", - api_key=azure_client.api_key, - additional_args={ - "headers": {"Authorization": f"Bearer {azure_client.api_key}"}, - "api_base": azure_client._base_url._uri_reference, - "atranscription": True, - "complete_input_dict": data, - }, - ) - - response = azure_client.audio.transcriptions.create( - **data, timeout=timeout # type: ignore - ) - - if isinstance(response, BaseModel): - stringified_response = response.model_dump() - else: - stringified_response = TranscriptionResponse(text=response).model_dump() - - ## LOGGING - logging_obj.post_call( - input=audio_file.name, - api_key=api_key, - additional_args={"complete_input_dict": data}, - original_response=stringified_response, - ) - hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"} - final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore - return final_response - - async def async_audio_transcriptions( - self, - audio_file: BinaryIO, - data: dict, - model_response: TranscriptionResponse, - timeout: float, - api_key: Optional[str] = None, - api_base: Optional[str] = None, - client=None, - azure_client_params=None, - max_retries=None, - logging_obj=None, - ): - response = None - try: - if client is None: - async_azure_client = AsyncAzureOpenAI( - **azure_client_params, - http_client=litellm.aclient_session, - ) - else: - async_azure_client = client - - ## LOGGING - logging_obj.pre_call( - input=f"audio_file_{uuid.uuid4()}", - api_key=async_azure_client.api_key, - additional_args={ - "headers": { - "Authorization": f"Bearer {async_azure_client.api_key}" - }, - "api_base": async_azure_client._base_url._uri_reference, - "atranscription": True, - "complete_input_dict": data, - }, - ) - - raw_response = ( - await async_azure_client.audio.transcriptions.with_raw_response.create( - **data, timeout=timeout - ) - ) # type: ignore - - headers = dict(raw_response.headers) - response = raw_response.parse() - - if isinstance(response, BaseModel): - stringified_response = response.model_dump() - else: - stringified_response = TranscriptionResponse(text=response).model_dump() - - ## LOGGING - logging_obj.post_call( - input=audio_file.name, - api_key=api_key, - additional_args={ - "headers": { - "Authorization": f"Bearer {async_azure_client.api_key}" - }, - "api_base": async_azure_client._base_url._uri_reference, - "atranscription": True, - "complete_input_dict": data, - }, - original_response=stringified_response, - ) - hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"} - response = convert_to_model_response_object( - _response_headers=headers, - response_object=stringified_response, - model_response_object=model_response, - hidden_params=hidden_params, - response_type="audio_transcription", - ) # type: ignore - return response - except Exception as e: - ## LOGGING - logging_obj.post_call( - input=input, - api_key=api_key, - original_response=str(e), - ) - raise e - def audio_speech( self, model: str, @@ -2473,7 +2292,7 @@ class AzureAssistantsAPI(BaseLLM): """ Here's an example: ``` - from litellm.llms.openai import OpenAIAssistantsAPI, MessageData + from litellm.llms.OpenAI.openai import OpenAIAssistantsAPI, MessageData # create thread message: MessageData = {"role": "user", "content": "Hey, how's it going?"} diff --git a/litellm/llms/OpenAI/audio_transcriptions.py b/litellm/llms/OpenAI/audio_transcriptions.py new file mode 100644 index 000000000..cfa0b0b1a --- /dev/null +++ b/litellm/llms/OpenAI/audio_transcriptions.py @@ -0,0 +1,177 @@ +from typing import Optional, Union + +import httpx +from openai import AsyncOpenAI, OpenAI +from pydantic import BaseModel + +import litellm +from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.types.utils import FileTypes +from litellm.utils import TranscriptionResponse, convert_to_model_response_object + +from .openai import OpenAIChatCompletion + + +class OpenAIAudioTranscription(OpenAIChatCompletion): + # Audio Transcriptions + async def make_openai_audio_transcriptions_request( + self, + openai_aclient: AsyncOpenAI, + data: dict, + timeout: Union[float, httpx.Timeout], + ): + """ + Helper to: + - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True + - call openai_aclient.audio.transcriptions.create by default + """ + try: + if litellm.return_response_headers is True: + raw_response = ( + await openai_aclient.audio.transcriptions.with_raw_response.create( + **data, timeout=timeout + ) + ) # type: ignore + headers = dict(raw_response.headers) + response = raw_response.parse() + return headers, response + else: + response = await openai_aclient.audio.transcriptions.create(**data, timeout=timeout) # type: ignore + return None, response + except Exception as e: + raise e + + def make_sync_openai_audio_transcriptions_request( + self, + openai_client: OpenAI, + data: dict, + timeout: Union[float, httpx.Timeout], + ): + """ + Helper to: + - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True + - call openai_aclient.audio.transcriptions.create by default + """ + try: + if litellm.return_response_headers is True: + raw_response = ( + openai_client.audio.transcriptions.with_raw_response.create( + **data, timeout=timeout + ) + ) # type: ignore + headers = dict(raw_response.headers) + response = raw_response.parse() + return headers, response + else: + response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore + return None, response + except Exception as e: + raise e + + def audio_transcriptions( + self, + model: str, + audio_file: FileTypes, + optional_params: dict, + model_response: TranscriptionResponse, + timeout: float, + max_retries: int, + api_key: Optional[str], + api_base: Optional[str], + client=None, + logging_obj=None, + atranscription: bool = False, + ): + data = {"model": model, "file": audio_file, **optional_params} + if atranscription is True: + return self.async_audio_transcriptions( + audio_file=audio_file, + data=data, + model_response=model_response, + timeout=timeout, + api_key=api_key, + api_base=api_base, + client=client, + max_retries=max_retries, + logging_obj=logging_obj, + ) + + openai_client = self._get_openai_client( + is_async=False, + api_key=api_key, + api_base=api_base, + timeout=timeout, + max_retries=max_retries, + ) + _, response = self.make_sync_openai_audio_transcriptions_request( + openai_client=openai_client, + data=data, + timeout=timeout, + ) + + if isinstance(response, BaseModel): + stringified_response = response.model_dump() + else: + stringified_response = TranscriptionResponse(text=response).model_dump() + + ## LOGGING + logging_obj.post_call( + input=get_audio_file_name(audio_file), + api_key=api_key, + additional_args={"complete_input_dict": data}, + original_response=stringified_response, + ) + hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"} + final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore + return final_response + + async def async_audio_transcriptions( + self, + audio_file: FileTypes, + data: dict, + model_response: TranscriptionResponse, + timeout: float, + logging_obj: LiteLLMLoggingObj, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + client=None, + max_retries=None, + ): + try: + openai_aclient = self._get_openai_client( + is_async=True, + api_key=api_key, + api_base=api_base, + timeout=timeout, + max_retries=max_retries, + client=client, + ) + + headers, response = await self.make_openai_audio_transcriptions_request( + openai_aclient=openai_aclient, + data=data, + timeout=timeout, + ) + logging_obj.model_call_details["response_headers"] = headers + if isinstance(response, BaseModel): + stringified_response = response.model_dump() + else: + stringified_response = TranscriptionResponse(text=response).model_dump() + ## LOGGING + logging_obj.post_call( + input=get_audio_file_name(audio_file), + api_key=api_key, + additional_args={"complete_input_dict": data}, + original_response=stringified_response, + ) + hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"} + return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore + except Exception as e: + ## LOGGING + logging_obj.post_call( + input=input, + api_key=api_key, + original_response=str(e), + ) + raise e diff --git a/litellm/llms/openai.py b/litellm/llms/OpenAI/openai.py similarity index 94% rename from litellm/llms/openai.py rename to litellm/llms/OpenAI/openai.py index e7a10c5cd..8021ccd59 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/OpenAI/openai.py @@ -4,16 +4,7 @@ import os import time import traceback import types -from typing import ( - Any, - BinaryIO, - Callable, - Coroutine, - Iterable, - Literal, - Optional, - Union, -) +from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union import httpx import openai @@ -33,14 +24,13 @@ from litellm.utils import ( Message, ModelResponse, TextCompletionResponse, - TranscriptionResponse, Usage, convert_to_model_response_object, ) -from ..types.llms.openai import * -from .base import BaseLLM -from .prompt_templates.factory import custom_prompt, prompt_factory +from ...types.llms.openai import * +from ..base import BaseLLM +from ..prompt_templates.factory import custom_prompt, prompt_factory class OpenAIError(Exception): @@ -1608,168 +1598,6 @@ class OpenAIChatCompletion(BaseLLM): else: raise OpenAIError(status_code=500, message=str(e)) - # Audio Transcriptions - async def make_openai_audio_transcriptions_request( - self, - openai_aclient: AsyncOpenAI, - data: dict, - timeout: Union[float, httpx.Timeout], - ): - """ - Helper to: - - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True - - call openai_aclient.audio.transcriptions.create by default - """ - try: - if litellm.return_response_headers is True: - raw_response = ( - await openai_aclient.audio.transcriptions.with_raw_response.create( - **data, timeout=timeout - ) - ) # type: ignore - headers = dict(raw_response.headers) - response = raw_response.parse() - return headers, response - else: - response = await openai_aclient.audio.transcriptions.create(**data, timeout=timeout) # type: ignore - return None, response - except Exception as e: - raise e - - def make_sync_openai_audio_transcriptions_request( - self, - openai_client: OpenAI, - data: dict, - timeout: Union[float, httpx.Timeout], - ): - """ - Helper to: - - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True - - call openai_aclient.audio.transcriptions.create by default - """ - try: - if litellm.return_response_headers is True: - raw_response = ( - openai_client.audio.transcriptions.with_raw_response.create( - **data, timeout=timeout - ) - ) # type: ignore - headers = dict(raw_response.headers) - response = raw_response.parse() - return headers, response - else: - response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore - return None, response - except Exception as e: - raise e - - def audio_transcriptions( - self, - model: str, - audio_file: BinaryIO, - optional_params: dict, - model_response: TranscriptionResponse, - timeout: float, - max_retries: int, - api_key: Optional[str], - api_base: Optional[str], - client=None, - logging_obj=None, - atranscription: bool = False, - ): - data = {"model": model, "file": audio_file, **optional_params} - if atranscription is True: - return self.async_audio_transcriptions( - audio_file=audio_file, - data=data, - model_response=model_response, - timeout=timeout, - api_key=api_key, - api_base=api_base, - client=client, - max_retries=max_retries, - logging_obj=logging_obj, - ) - - openai_client = self._get_openai_client( - is_async=False, - api_key=api_key, - api_base=api_base, - timeout=timeout, - max_retries=max_retries, - ) - _, response = self.make_sync_openai_audio_transcriptions_request( - openai_client=openai_client, - data=data, - timeout=timeout, - ) - - if isinstance(response, BaseModel): - stringified_response = response.model_dump() - else: - stringified_response = TranscriptionResponse(text=response).model_dump() - - ## LOGGING - logging_obj.post_call( - input=audio_file.name, - api_key=api_key, - additional_args={"complete_input_dict": data}, - original_response=stringified_response, - ) - hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"} - final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore - return final_response - - async def async_audio_transcriptions( - self, - audio_file: BinaryIO, - data: dict, - model_response: TranscriptionResponse, - timeout: float, - logging_obj: LiteLLMLoggingObj, - api_key: Optional[str] = None, - api_base: Optional[str] = None, - client=None, - max_retries=None, - ): - try: - openai_aclient = self._get_openai_client( - is_async=True, - api_key=api_key, - api_base=api_base, - timeout=timeout, - max_retries=max_retries, - client=client, - ) - - headers, response = await self.make_openai_audio_transcriptions_request( - openai_aclient=openai_aclient, - data=data, - timeout=timeout, - ) - logging_obj.model_call_details["response_headers"] = headers - if isinstance(response, BaseModel): - stringified_response = response.model_dump() - else: - stringified_response = TranscriptionResponse(text=response).model_dump() - ## LOGGING - logging_obj.post_call( - input=audio_file.name, - api_key=api_key, - additional_args={"complete_input_dict": data}, - original_response=stringified_response, - ) - hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"} - return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore - except Exception as e: - ## LOGGING - logging_obj.post_call( - input=input, - api_key=api_key, - original_response=str(e), - ) - raise e - def audio_speech( self, model: str, @@ -3228,7 +3056,7 @@ class OpenAIAssistantsAPI(BaseLLM): """ Here's an example: ``` - from litellm.llms.openai import OpenAIAssistantsAPI, MessageData + from litellm.llms.OpenAI.openai import OpenAIAssistantsAPI, MessageData # create thread message: MessageData = {"role": "user", "content": "Hey, how's it going?"} diff --git a/litellm/llms/azure_text.py b/litellm/llms/azure_text.py index fb6e4875e..9a8d462e5 100644 --- a/litellm/llms/azure_text.py +++ b/litellm/llms/azure_text.py @@ -1,7 +1,7 @@ import json import types # type: ignore import uuid -from typing import Any, BinaryIO, Callable, Optional, Union +from typing import Any, Callable, Optional, Union import httpx import requests @@ -19,8 +19,8 @@ from litellm.utils import ( convert_to_model_response_object, ) -from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig from .base import BaseLLM +from .OpenAI.openai import OpenAITextCompletion, OpenAITextCompletionConfig from .prompt_templates.factory import custom_prompt, prompt_factory openai_text_completion_config = OpenAITextCompletionConfig() diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/context_caching/vertex_ai_context_caching.py b/litellm/llms/vertex_ai_and_google_ai_studio/context_caching/vertex_ai_context_caching.py index d087d7212..a82da7ad8 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/context_caching/vertex_ai_context_caching.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/context_caching/vertex_ai_context_caching.py @@ -7,7 +7,7 @@ import litellm from litellm.caching import Cache from litellm.litellm_core_utils.litellm_logging import Logging from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler -from litellm.llms.openai import AllMessageValues +from litellm.llms.OpenAI.openai import AllMessageValues from litellm.types.llms.vertex_ai import ( CachedContentListAllResponseBody, RequestBody, diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py b/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py index 99ebfae1e..bc2424ecc 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py @@ -12,7 +12,7 @@ from litellm.llms.custom_httpx.http_handler import ( _get_async_httpx_client, _get_httpx_client, ) -from litellm.llms.openai import HttpxBinaryResponseContent +from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import ( VertexLLM, ) diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py index 60c1fa607..69909765e 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py @@ -81,7 +81,7 @@ class VertexAIPartnerModels(BaseLLM): from google.cloud import aiplatform from litellm.llms.databricks import DatabricksChatCompletion - from litellm.llms.openai import OpenAIChatCompletion + from litellm.llms.OpenAI.openai import OpenAIChatCompletion from litellm.llms.text_completion_codestral import CodestralTextCompletion from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import ( VertexLLM, diff --git a/litellm/main.py b/litellm/main.py index 9e7297e11..bb2c1c47f 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -22,18 +22,7 @@ import uuid from concurrent.futures import ThreadPoolExecutor from copy import deepcopy from functools import partial -from typing import ( - Any, - BinaryIO, - Callable, - Dict, - List, - Literal, - Mapping, - Optional, - Type, - Union, -) +from typing import Any, Callable, Dict, List, Literal, Mapping, Optional, Type, Union import dotenv import httpx @@ -93,8 +82,9 @@ from .llms import ( from .llms.AI21 import completion as ai21 from .llms.anthropic.chat import AnthropicChatCompletion from .llms.anthropic.completion import AnthropicTextCompletion -from .llms.azure import AzureChatCompletion, _check_dynamic_azure_params from .llms.azure_text import AzureTextCompletion +from .llms.AzureOpenAI.audio_transcriptions import AzureAudioTranscription +from .llms.AzureOpenAI.azure import AzureChatCompletion, _check_dynamic_azure_params from .llms.bedrock import image_generation as bedrock_image_generation # type: ignore from .llms.bedrock.chat import BedrockConverseLLM, BedrockLLM from .llms.bedrock.embed.embedding import BedrockEmbedding @@ -104,7 +94,8 @@ from .llms.cohere import embed as cohere_embed from .llms.custom_llm import CustomLLM, custom_chat_llm_router from .llms.databricks import DatabricksChatCompletion from .llms.huggingface_restapi import Huggingface -from .llms.openai import OpenAIChatCompletion, OpenAITextCompletion +from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription +from .llms.OpenAI.openai import OpenAIChatCompletion, OpenAITextCompletion from .llms.predibase import PredibaseChatCompletion from .llms.prompt_templates.factory import ( custom_prompt, @@ -146,6 +137,7 @@ from .types.llms.openai import HttpxBinaryResponseContent from .types.utils import ( AdapterCompletionStreamWrapper, ChatCompletionMessageToolCall, + FileTypes, HiddenParams, all_litellm_params, ) @@ -169,11 +161,13 @@ from litellm.utils import ( ####### ENVIRONMENT VARIABLES ################### openai_chat_completions = OpenAIChatCompletion() openai_text_completions = OpenAITextCompletion() +openai_audio_transcriptions = OpenAIAudioTranscription() databricks_chat_completions = DatabricksChatCompletion() anthropic_chat_completions = AnthropicChatCompletion() anthropic_text_completions = AnthropicTextCompletion() azure_chat_completions = AzureChatCompletion() azure_text_completions = AzureTextCompletion() +azure_audio_transcriptions = AzureAudioTranscription() huggingface = Huggingface() predibase_chat_completions = PredibaseChatCompletion() codestral_text_completions = CodestralTextCompletion() @@ -4614,7 +4608,7 @@ async def atranscription(*args, **kwargs) -> TranscriptionResponse: @client def transcription( model: str, - file: BinaryIO, + file: FileTypes, ## OPTIONAL OPENAI PARAMS ## language: Optional[str] = None, prompt: Optional[str] = None, @@ -4704,7 +4698,7 @@ def transcription( or get_secret("AZURE_API_KEY") ) # type: ignore - response = azure_chat_completions.audio_transcriptions( + response = azure_audio_transcriptions.audio_transcriptions( model=model, audio_file=file, optional_params=optional_params, @@ -4738,7 +4732,7 @@ def transcription( or litellm.openai_key or get_secret("OPENAI_API_KEY") ) # type: ignore - response = openai_chat_completions.audio_transcriptions( + response = openai_audio_transcriptions.audio_transcriptions( model=model, audio_file=file, optional_params=optional_params, diff --git a/litellm/router.py b/litellm/router.py index 2743a36b9..233331e80 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -24,18 +24,7 @@ import traceback import uuid from collections import defaultdict from datetime import datetime -from typing import ( - Any, - BinaryIO, - Dict, - Iterable, - List, - Literal, - Optional, - Tuple, - TypedDict, - Union, -) +from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, TypedDict, Union import httpx import openai @@ -48,7 +37,7 @@ from litellm.assistants.main import AssistantDeleted from litellm.caching import DualCache, InMemoryCache, RedisCache from litellm.integrations.custom_logger import CustomLogger from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging -from litellm.llms.azure import get_azure_ad_token_from_oidc +from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc from litellm.router_strategy.least_busy import LeastBusyLoggingHandler from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler @@ -1342,7 +1331,7 @@ class Router: self.fail_calls[model_name] += 1 raise e - async def atranscription(self, file: BinaryIO, model: str, **kwargs): + async def atranscription(self, file: FileTypes, model: str, **kwargs): """ Example Usage: @@ -1386,7 +1375,7 @@ class Router: ) raise e - async def _atranscription(self, file: BinaryIO, model: str, **kwargs): + async def _atranscription(self, file: FileTypes, model: str, **kwargs): try: verbose_router_logger.debug( f"Inside _atranscription()- model: {model}; kwargs: {kwargs}" diff --git a/litellm/router_utils/client_initalization_utils.py b/litellm/router_utils/client_initalization_utils.py index 9d68891c4..4f750336e 100644 --- a/litellm/router_utils/client_initalization_utils.py +++ b/litellm/router_utils/client_initalization_utils.py @@ -8,7 +8,7 @@ import openai import litellm from litellm._logging import verbose_router_logger -from litellm.llms.azure import get_azure_ad_token_from_oidc +from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc from litellm.secret_managers.get_azure_ad_token_provider import ( get_azure_ad_token_provider, ) @@ -337,7 +337,9 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): azure_client_params["azure_ad_token_provider"] = ( azure_ad_token_provider ) - from litellm.llms.azure import select_azure_base_url_or_endpoint + from litellm.llms.AzureOpenAI.azure import ( + select_azure_base_url_or_endpoint, + ) # this decides if we should set azure_endpoint or base_url on Azure OpenAI Client # required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client diff --git a/litellm/tests/test_assistants.py b/litellm/tests/test_assistants.py index c376eecc2..0806697d7 100644 --- a/litellm/tests/test_assistants.py +++ b/litellm/tests/test_assistants.py @@ -20,15 +20,15 @@ from typing_extensions import override import litellm from litellm import create_thread, get_thread -from litellm.llms.openai import ( +from litellm.llms.OpenAI.openai import ( AssistantEventHandler, AsyncAssistantEventHandler, AsyncCursorPage, MessageData, OpenAIAssistantsAPI, ) -from litellm.llms.openai import OpenAIMessage as Message -from litellm.llms.openai import SyncCursorPage, Thread +from litellm.llms.OpenAI.openai import OpenAIMessage as Message +from litellm.llms.OpenAI.openai import SyncCursorPage, Thread """ V0 Scope: diff --git a/litellm/tests/test_audio_speech.py b/litellm/tests/test_audio_speech.py index 2c710d37e..4e45b9953 100644 --- a/litellm/tests/test_audio_speech.py +++ b/litellm/tests/test_audio_speech.py @@ -60,7 +60,7 @@ async def test_audio_speech_litellm(sync_mode, model, api_base, api_key): optional_params={}, ) - from litellm.llms.openai import HttpxBinaryResponseContent + from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent assert isinstance(response, HttpxBinaryResponseContent) else: @@ -78,7 +78,7 @@ async def test_audio_speech_litellm(sync_mode, model, api_base, api_key): optional_params={}, ) - from litellm.llms.openai import HttpxBinaryResponseContent + from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent assert isinstance(response, HttpxBinaryResponseContent) @@ -115,7 +115,7 @@ async def test_audio_speech_router(mode): optional_params={}, ) - from litellm.llms.openai import HttpxBinaryResponseContent + from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent assert isinstance(response, HttpxBinaryResponseContent) @@ -146,7 +146,7 @@ async def test_audio_speech_litellm_vertex(sync_mode): from types import SimpleNamespace - from litellm.llms.openai import HttpxBinaryResponseContent + from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent response.stream_to_file(speech_file_path) diff --git a/litellm/tests/test_secret_manager.py b/litellm/tests/test_secret_manager.py index a380e6287..397128ecb 100644 --- a/litellm/tests/test_secret_manager.py +++ b/litellm/tests/test_secret_manager.py @@ -16,7 +16,7 @@ sys.path.insert( ) # Adds the parent directory to the system path import pytest -from litellm.llms.azure import get_azure_ad_token_from_oidc +from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc from litellm.llms.bedrock.chat import BedrockConverseLLM, BedrockLLM from litellm.secret_managers.aws_secret_manager import load_aws_secret_manager from litellm.secret_managers.main import get_secret diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 43313b7f7..772ed8a64 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -20,7 +20,7 @@ from litellm.utils import ModelResponseListIterator sys.path.insert( 0, os.path.abspath("../..") -) # Adds the parent directory to the system path +) # Adds the parent directory to the system-path from dotenv import load_dotenv load_dotenv() diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 788199c00..9d65fe87e 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -1,16 +1,5 @@ from os import PathLike -from typing import ( - IO, - Any, - BinaryIO, - Iterable, - List, - Literal, - Mapping, - Optional, - Tuple, - Union, -) +from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union from openai._legacy_response import HttpxBinaryResponseContent from openai.lib.streaming._assistants import ( diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 19282d195..e9fe7d963 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -5,6 +5,7 @@ from enum import Enum from typing import Any, Dict, List, Literal, Optional, Tuple, Union from openai._models import BaseModel as OpenAIObject +from openai.types.audio.transcription_create_params import FileTypes from openai.types.completion_usage import CompletionUsage from pydantic import ConfigDict, Field, PrivateAttr from typing_extensions import Callable, Dict, Required, TypedDict, override diff --git a/litellm/utils.py b/litellm/utils.py index 33d3a59a3..c362a7b5a 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -55,12 +55,10 @@ from tokenizers import Tokenizer import litellm import litellm._service_logger # for storing API inputs, outputs, and metadata import litellm.litellm_core_utils +import litellm.litellm_core_utils.audio_utils.utils import litellm.litellm_core_utils.json_validation_rule from litellm.caching import DualCache -from litellm.litellm_core_utils.core_helpers import ( - get_file_check_sum, - map_finish_reason, -) +from litellm.litellm_core_utils.core_helpers import map_finish_reason from litellm.litellm_core_utils.exception_mapping_utils import get_error_message from litellm.litellm_core_utils.get_llm_provider_logic import ( _is_non_openai_azure_model, @@ -86,6 +84,7 @@ from litellm.types.utils import ( Delta, Embedding, EmbeddingResponse, + FileTypes, ImageResponse, Message, ModelInfo, @@ -161,7 +160,6 @@ except Exception as e: from concurrent.futures import ThreadPoolExecutor from typing import ( Any, - BinaryIO, Callable, Dict, Iterable, @@ -566,14 +564,17 @@ def function_setup( call_type == CallTypes.atranscription.value or call_type == CallTypes.transcription.value ): - _file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"] - file_checksum = get_file_check_sum(_file=_file_name) - file_name = _file_name.name + _file_obj: FileTypes = args[1] if len(args) > 1 else kwargs["file"] + file_checksum = ( + litellm.litellm_core_utils.audio_utils.utils.get_audio_file_name( + file_obj=_file_obj + ) + ) if "metadata" in kwargs: kwargs["metadata"]["file_checksum"] = file_checksum else: kwargs["metadata"] = {"file_checksum": file_checksum} - messages = file_name + messages = file_checksum elif ( call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value ):