forked from phoenix/litellm-mirror
use correct type hints for audio transcriptions
This commit is contained in:
parent
a9be7dd79b
commit
81ee1653af
13 changed files with 420 additions and 430 deletions
|
@ -20,8 +20,8 @@ import httpx
|
|||
|
||||
import litellm
|
||||
from litellm import client
|
||||
from litellm.llms.azure import AzureBatchesAPI
|
||||
from litellm.llms.openai import OpenAIBatchesAPI
|
||||
from litellm.llms.AzureOpenAI.azure import AzureBatchesAPI
|
||||
from litellm.llms.OpenAI.openai import OpenAIBatchesAPI
|
||||
from litellm.secret_managers.main import get_secret
|
||||
from litellm.types.llms.openai import (
|
||||
Batch,
|
||||
|
|
|
@ -17,7 +17,7 @@ import time
|
|||
import traceback
|
||||
from datetime import timedelta
|
||||
from enum import Enum
|
||||
from typing import Any, BinaryIO, List, Literal, Optional, Union
|
||||
from typing import Any, List, Literal, Optional, Union
|
||||
|
||||
from openai._models import BaseModel as OpenAIObject
|
||||
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
# What is this?
|
||||
## Helper utilities
|
||||
import os
|
||||
from typing import BinaryIO, List, Literal, Optional, Tuple
|
||||
from typing import List, Literal, Optional, Tuple
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.types.utils import FileTypes
|
||||
|
||||
|
||||
def map_finish_reason(
|
||||
|
@ -88,18 +89,19 @@ def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None):
|
|||
return None
|
||||
|
||||
|
||||
def get_file_check_sum(_file: BinaryIO):
|
||||
def get_audio_file_name(file_obj: FileTypes) -> str:
|
||||
"""
|
||||
Helper to safely get file checksum - used as a cache key
|
||||
Safely get the name of a file-like object or return its string representation.
|
||||
|
||||
Args:
|
||||
file_obj (Any): A file-like object or any other object.
|
||||
|
||||
Returns:
|
||||
str: The name of the file if available, otherwise a string representation of the object.
|
||||
"""
|
||||
try:
|
||||
file_descriptor = _file.fileno()
|
||||
file_stat = os.fstat(file_descriptor)
|
||||
file_size = str(file_stat.st_size)
|
||||
file_checksum = _file.name + file_size
|
||||
return file_checksum
|
||||
except Exception as e:
|
||||
verbose_logger.error(f"Error getting file_checksum: {(str(e))}")
|
||||
file_checksum = _file.name
|
||||
return file_checksum
|
||||
return file_checksum
|
||||
if hasattr(file_obj, "name"):
|
||||
return getattr(file_obj, "name")
|
||||
elif hasattr(file_obj, "__str__"):
|
||||
return str(file_obj)
|
||||
else:
|
||||
return repr(file_obj)
|
||||
|
|
192
litellm/llms/AzureOpenAI/audio_transcriptions.py
Normal file
192
litellm/llms/AzureOpenAI/audio_transcriptions.py
Normal file
|
@ -0,0 +1,192 @@
|
|||
import uuid
|
||||
from typing import Optional, Union
|
||||
|
||||
import httpx
|
||||
from openai import AsyncAzureOpenAI, AzureOpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm
|
||||
from litellm.litellm_core_utils.core_helpers import get_audio_file_name
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.types.utils import FileTypes
|
||||
from litellm.utils import TranscriptionResponse, convert_to_model_response_object
|
||||
|
||||
from .azure import (
|
||||
AzureChatCompletion,
|
||||
get_azure_ad_token_from_oidc,
|
||||
select_azure_base_url_or_endpoint,
|
||||
)
|
||||
|
||||
|
||||
class AzureAudioTranscription(AzureChatCompletion):
|
||||
def audio_transcriptions(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: FileTypes,
|
||||
optional_params: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
max_retries: int,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
api_version: Optional[str] = None,
|
||||
client=None,
|
||||
azure_ad_token: Optional[str] = None,
|
||||
logging_obj=None,
|
||||
atranscription: bool = False,
|
||||
):
|
||||
data = {"model": model, "file": audio_file, **optional_params}
|
||||
|
||||
# init AzureOpenAI Client
|
||||
azure_client_params = {
|
||||
"api_version": api_version,
|
||||
"azure_endpoint": api_base,
|
||||
"azure_deployment": model,
|
||||
"timeout": timeout,
|
||||
}
|
||||
|
||||
azure_client_params = select_azure_base_url_or_endpoint(
|
||||
azure_client_params=azure_client_params
|
||||
)
|
||||
if api_key is not None:
|
||||
azure_client_params["api_key"] = api_key
|
||||
elif azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
azure_client_params["azure_ad_token"] = azure_ad_token
|
||||
|
||||
if max_retries is not None:
|
||||
azure_client_params["max_retries"] = max_retries
|
||||
|
||||
if atranscription is True:
|
||||
return self.async_audio_transcriptions(
|
||||
audio_file=audio_file,
|
||||
data=data,
|
||||
model_response=model_response,
|
||||
timeout=timeout,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
client=client,
|
||||
azure_client_params=azure_client_params,
|
||||
max_retries=max_retries,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
if client is None:
|
||||
azure_client = AzureOpenAI(http_client=litellm.client_session, **azure_client_params) # type: ignore
|
||||
else:
|
||||
azure_client = client
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=f"audio_file_{uuid.uuid4()}",
|
||||
api_key=azure_client.api_key,
|
||||
additional_args={
|
||||
"headers": {"Authorization": f"Bearer {azure_client.api_key}"},
|
||||
"api_base": azure_client._base_url._uri_reference,
|
||||
"atranscription": True,
|
||||
"complete_input_dict": data,
|
||||
},
|
||||
)
|
||||
|
||||
response = azure_client.audio.transcriptions.create(
|
||||
**data, timeout=timeout # type: ignore
|
||||
)
|
||||
|
||||
if isinstance(response, BaseModel):
|
||||
stringified_response = response.model_dump()
|
||||
else:
|
||||
stringified_response = TranscriptionResponse(text=response).model_dump()
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=get_audio_file_name(audio_file),
|
||||
api_key=api_key,
|
||||
additional_args={"complete_input_dict": data},
|
||||
original_response=stringified_response,
|
||||
)
|
||||
hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
|
||||
final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
|
||||
return final_response
|
||||
|
||||
async def async_audio_transcriptions(
|
||||
self,
|
||||
audio_file: FileTypes,
|
||||
data: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
client=None,
|
||||
azure_client_params=None,
|
||||
max_retries=None,
|
||||
logging_obj=None,
|
||||
):
|
||||
response = None
|
||||
try:
|
||||
if client is None:
|
||||
async_azure_client = AsyncAzureOpenAI(
|
||||
**azure_client_params,
|
||||
http_client=litellm.aclient_session,
|
||||
)
|
||||
else:
|
||||
async_azure_client = client
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=f"audio_file_{uuid.uuid4()}",
|
||||
api_key=async_azure_client.api_key,
|
||||
additional_args={
|
||||
"headers": {
|
||||
"Authorization": f"Bearer {async_azure_client.api_key}"
|
||||
},
|
||||
"api_base": async_azure_client._base_url._uri_reference,
|
||||
"atranscription": True,
|
||||
"complete_input_dict": data,
|
||||
},
|
||||
)
|
||||
|
||||
raw_response = (
|
||||
await async_azure_client.audio.transcriptions.with_raw_response.create(
|
||||
**data, timeout=timeout
|
||||
)
|
||||
) # type: ignore
|
||||
|
||||
headers = dict(raw_response.headers)
|
||||
response = raw_response.parse()
|
||||
|
||||
if isinstance(response, BaseModel):
|
||||
stringified_response = response.model_dump()
|
||||
else:
|
||||
stringified_response = TranscriptionResponse(text=response).model_dump()
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=get_audio_file_name(audio_file),
|
||||
api_key=api_key,
|
||||
additional_args={
|
||||
"headers": {
|
||||
"Authorization": f"Bearer {async_azure_client.api_key}"
|
||||
},
|
||||
"api_base": async_azure_client._base_url._uri_reference,
|
||||
"atranscription": True,
|
||||
"complete_input_dict": data,
|
||||
},
|
||||
original_response=stringified_response,
|
||||
)
|
||||
hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
|
||||
response = convert_to_model_response_object(
|
||||
_response_headers=headers,
|
||||
response_object=stringified_response,
|
||||
model_response_object=model_response,
|
||||
hidden_params=hidden_params,
|
||||
response_type="audio_transcription",
|
||||
) # type: ignore
|
||||
return response
|
||||
except Exception as e:
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=input,
|
||||
api_key=api_key,
|
||||
original_response=str(e),
|
||||
)
|
||||
raise e
|
|
@ -4,17 +4,7 @@ import os
|
|||
import time
|
||||
import types
|
||||
import uuid
|
||||
from typing import (
|
||||
Any,
|
||||
BinaryIO,
|
||||
Callable,
|
||||
Coroutine,
|
||||
Iterable,
|
||||
List,
|
||||
Literal,
|
||||
Optional,
|
||||
Union,
|
||||
)
|
||||
from typing import Any, Callable, Coroutine, Iterable, List, Literal, Optional, Union
|
||||
|
||||
import httpx # type: ignore
|
||||
import requests
|
||||
|
@ -27,6 +17,7 @@ from litellm import ImageResponse, OpenAIConfig
|
|||
from litellm.caching import DualCache
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.types.utils import FileTypes
|
||||
from litellm.utils import (
|
||||
Choices,
|
||||
CustomStreamWrapper,
|
||||
|
@ -39,7 +30,7 @@ from litellm.utils import (
|
|||
modify_url,
|
||||
)
|
||||
|
||||
from ..types.llms.openai import (
|
||||
from ...types.llms.openai import (
|
||||
Assistant,
|
||||
AssistantEventHandler,
|
||||
AssistantStreamManager,
|
||||
|
@ -63,7 +54,7 @@ from ..types.llms.openai import (
|
|||
SyncCursorPage,
|
||||
Thread,
|
||||
)
|
||||
from .base import BaseLLM
|
||||
from ..base import BaseLLM
|
||||
|
||||
azure_ad_cache = DualCache()
|
||||
|
||||
|
@ -1570,178 +1561,6 @@ class AzureChatCompletion(BaseLLM):
|
|||
else:
|
||||
raise AzureOpenAIError(status_code=500, message=str(e))
|
||||
|
||||
def audio_transcriptions(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: BinaryIO,
|
||||
optional_params: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
max_retries: int,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
api_version: Optional[str] = None,
|
||||
client=None,
|
||||
azure_ad_token: Optional[str] = None,
|
||||
logging_obj=None,
|
||||
atranscription: bool = False,
|
||||
):
|
||||
data = {"model": model, "file": audio_file, **optional_params}
|
||||
|
||||
# init AzureOpenAI Client
|
||||
azure_client_params = {
|
||||
"api_version": api_version,
|
||||
"azure_endpoint": api_base,
|
||||
"azure_deployment": model,
|
||||
"timeout": timeout,
|
||||
}
|
||||
|
||||
azure_client_params = select_azure_base_url_or_endpoint(
|
||||
azure_client_params=azure_client_params
|
||||
)
|
||||
if api_key is not None:
|
||||
azure_client_params["api_key"] = api_key
|
||||
elif azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
azure_client_params["azure_ad_token"] = azure_ad_token
|
||||
|
||||
if max_retries is not None:
|
||||
azure_client_params["max_retries"] = max_retries
|
||||
|
||||
if atranscription is True:
|
||||
return self.async_audio_transcriptions(
|
||||
audio_file=audio_file,
|
||||
data=data,
|
||||
model_response=model_response,
|
||||
timeout=timeout,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
client=client,
|
||||
azure_client_params=azure_client_params,
|
||||
max_retries=max_retries,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
if client is None:
|
||||
azure_client = AzureOpenAI(http_client=litellm.client_session, **azure_client_params) # type: ignore
|
||||
else:
|
||||
azure_client = client
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=f"audio_file_{uuid.uuid4()}",
|
||||
api_key=azure_client.api_key,
|
||||
additional_args={
|
||||
"headers": {"Authorization": f"Bearer {azure_client.api_key}"},
|
||||
"api_base": azure_client._base_url._uri_reference,
|
||||
"atranscription": True,
|
||||
"complete_input_dict": data,
|
||||
},
|
||||
)
|
||||
|
||||
response = azure_client.audio.transcriptions.create(
|
||||
**data, timeout=timeout # type: ignore
|
||||
)
|
||||
|
||||
if isinstance(response, BaseModel):
|
||||
stringified_response = response.model_dump()
|
||||
else:
|
||||
stringified_response = TranscriptionResponse(text=response).model_dump()
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=audio_file.name,
|
||||
api_key=api_key,
|
||||
additional_args={"complete_input_dict": data},
|
||||
original_response=stringified_response,
|
||||
)
|
||||
hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
|
||||
final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
|
||||
return final_response
|
||||
|
||||
async def async_audio_transcriptions(
|
||||
self,
|
||||
audio_file: BinaryIO,
|
||||
data: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
client=None,
|
||||
azure_client_params=None,
|
||||
max_retries=None,
|
||||
logging_obj=None,
|
||||
):
|
||||
response = None
|
||||
try:
|
||||
if client is None:
|
||||
async_azure_client = AsyncAzureOpenAI(
|
||||
**azure_client_params,
|
||||
http_client=litellm.aclient_session,
|
||||
)
|
||||
else:
|
||||
async_azure_client = client
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=f"audio_file_{uuid.uuid4()}",
|
||||
api_key=async_azure_client.api_key,
|
||||
additional_args={
|
||||
"headers": {
|
||||
"Authorization": f"Bearer {async_azure_client.api_key}"
|
||||
},
|
||||
"api_base": async_azure_client._base_url._uri_reference,
|
||||
"atranscription": True,
|
||||
"complete_input_dict": data,
|
||||
},
|
||||
)
|
||||
|
||||
raw_response = (
|
||||
await async_azure_client.audio.transcriptions.with_raw_response.create(
|
||||
**data, timeout=timeout
|
||||
)
|
||||
) # type: ignore
|
||||
|
||||
headers = dict(raw_response.headers)
|
||||
response = raw_response.parse()
|
||||
|
||||
if isinstance(response, BaseModel):
|
||||
stringified_response = response.model_dump()
|
||||
else:
|
||||
stringified_response = TranscriptionResponse(text=response).model_dump()
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=audio_file.name,
|
||||
api_key=api_key,
|
||||
additional_args={
|
||||
"headers": {
|
||||
"Authorization": f"Bearer {async_azure_client.api_key}"
|
||||
},
|
||||
"api_base": async_azure_client._base_url._uri_reference,
|
||||
"atranscription": True,
|
||||
"complete_input_dict": data,
|
||||
},
|
||||
original_response=stringified_response,
|
||||
)
|
||||
hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
|
||||
response = convert_to_model_response_object(
|
||||
_response_headers=headers,
|
||||
response_object=stringified_response,
|
||||
model_response_object=model_response,
|
||||
hidden_params=hidden_params,
|
||||
response_type="audio_transcription",
|
||||
) # type: ignore
|
||||
return response
|
||||
except Exception as e:
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=input,
|
||||
api_key=api_key,
|
||||
original_response=str(e),
|
||||
)
|
||||
raise e
|
||||
|
||||
def audio_speech(
|
||||
self,
|
||||
model: str,
|
177
litellm/llms/OpenAI/audio_transcriptions.py
Normal file
177
litellm/llms/OpenAI/audio_transcriptions.py
Normal file
|
@ -0,0 +1,177 @@
|
|||
from typing import Optional, Union
|
||||
|
||||
import httpx
|
||||
from openai import AsyncOpenAI, OpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm
|
||||
from litellm.litellm_core_utils.core_helpers import get_audio_file_name
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.types.utils import FileTypes
|
||||
from litellm.utils import TranscriptionResponse, convert_to_model_response_object
|
||||
|
||||
from .openai import OpenAIChatCompletion
|
||||
|
||||
|
||||
class OpenAIAudioTranscription(OpenAIChatCompletion):
|
||||
# Audio Transcriptions
|
||||
async def make_openai_audio_transcriptions_request(
|
||||
self,
|
||||
openai_aclient: AsyncOpenAI,
|
||||
data: dict,
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
):
|
||||
"""
|
||||
Helper to:
|
||||
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
|
||||
- call openai_aclient.audio.transcriptions.create by default
|
||||
"""
|
||||
try:
|
||||
if litellm.return_response_headers is True:
|
||||
raw_response = (
|
||||
await openai_aclient.audio.transcriptions.with_raw_response.create(
|
||||
**data, timeout=timeout
|
||||
)
|
||||
) # type: ignore
|
||||
headers = dict(raw_response.headers)
|
||||
response = raw_response.parse()
|
||||
return headers, response
|
||||
else:
|
||||
response = await openai_aclient.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
|
||||
return None, response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def make_sync_openai_audio_transcriptions_request(
|
||||
self,
|
||||
openai_client: OpenAI,
|
||||
data: dict,
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
):
|
||||
"""
|
||||
Helper to:
|
||||
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
|
||||
- call openai_aclient.audio.transcriptions.create by default
|
||||
"""
|
||||
try:
|
||||
if litellm.return_response_headers is True:
|
||||
raw_response = (
|
||||
openai_client.audio.transcriptions.with_raw_response.create(
|
||||
**data, timeout=timeout
|
||||
)
|
||||
) # type: ignore
|
||||
headers = dict(raw_response.headers)
|
||||
response = raw_response.parse()
|
||||
return headers, response
|
||||
else:
|
||||
response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
|
||||
return None, response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def audio_transcriptions(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: FileTypes,
|
||||
optional_params: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
max_retries: int,
|
||||
api_key: Optional[str],
|
||||
api_base: Optional[str],
|
||||
client=None,
|
||||
logging_obj=None,
|
||||
atranscription: bool = False,
|
||||
):
|
||||
data = {"model": model, "file": audio_file, **optional_params}
|
||||
if atranscription is True:
|
||||
return self.async_audio_transcriptions(
|
||||
audio_file=audio_file,
|
||||
data=data,
|
||||
model_response=model_response,
|
||||
timeout=timeout,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
client=client,
|
||||
max_retries=max_retries,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
|
||||
openai_client = self._get_openai_client(
|
||||
is_async=False,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
)
|
||||
_, response = self.make_sync_openai_audio_transcriptions_request(
|
||||
openai_client=openai_client,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
if isinstance(response, BaseModel):
|
||||
stringified_response = response.model_dump()
|
||||
else:
|
||||
stringified_response = TranscriptionResponse(text=response).model_dump()
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=get_audio_file_name(audio_file),
|
||||
api_key=api_key,
|
||||
additional_args={"complete_input_dict": data},
|
||||
original_response=stringified_response,
|
||||
)
|
||||
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
|
||||
final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
|
||||
return final_response
|
||||
|
||||
async def async_audio_transcriptions(
|
||||
self,
|
||||
audio_file: FileTypes,
|
||||
data: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
client=None,
|
||||
max_retries=None,
|
||||
):
|
||||
try:
|
||||
openai_aclient = self._get_openai_client(
|
||||
is_async=True,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
client=client,
|
||||
)
|
||||
|
||||
headers, response = await self.make_openai_audio_transcriptions_request(
|
||||
openai_aclient=openai_aclient,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
logging_obj.model_call_details["response_headers"] = headers
|
||||
if isinstance(response, BaseModel):
|
||||
stringified_response = response.model_dump()
|
||||
else:
|
||||
stringified_response = TranscriptionResponse(text=response).model_dump()
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=get_audio_file_name(audio_file),
|
||||
api_key=api_key,
|
||||
additional_args={"complete_input_dict": data},
|
||||
original_response=stringified_response,
|
||||
)
|
||||
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
|
||||
return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
|
||||
except Exception as e:
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=input,
|
||||
api_key=api_key,
|
||||
original_response=str(e),
|
||||
)
|
||||
raise e
|
|
@ -4,16 +4,7 @@ import os
|
|||
import time
|
||||
import traceback
|
||||
import types
|
||||
from typing import (
|
||||
Any,
|
||||
BinaryIO,
|
||||
Callable,
|
||||
Coroutine,
|
||||
Iterable,
|
||||
Literal,
|
||||
Optional,
|
||||
Union,
|
||||
)
|
||||
from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union
|
||||
|
||||
import httpx
|
||||
import openai
|
||||
|
@ -33,14 +24,13 @@ from litellm.utils import (
|
|||
Message,
|
||||
ModelResponse,
|
||||
TextCompletionResponse,
|
||||
TranscriptionResponse,
|
||||
Usage,
|
||||
convert_to_model_response_object,
|
||||
)
|
||||
|
||||
from ..types.llms.openai import *
|
||||
from .base import BaseLLM
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
from ...types.llms.openai import *
|
||||
from ..base import BaseLLM
|
||||
from ..prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
|
||||
class OpenAIError(Exception):
|
||||
|
@ -1608,168 +1598,6 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
else:
|
||||
raise OpenAIError(status_code=500, message=str(e))
|
||||
|
||||
# Audio Transcriptions
|
||||
async def make_openai_audio_transcriptions_request(
|
||||
self,
|
||||
openai_aclient: AsyncOpenAI,
|
||||
data: dict,
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
):
|
||||
"""
|
||||
Helper to:
|
||||
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
|
||||
- call openai_aclient.audio.transcriptions.create by default
|
||||
"""
|
||||
try:
|
||||
if litellm.return_response_headers is True:
|
||||
raw_response = (
|
||||
await openai_aclient.audio.transcriptions.with_raw_response.create(
|
||||
**data, timeout=timeout
|
||||
)
|
||||
) # type: ignore
|
||||
headers = dict(raw_response.headers)
|
||||
response = raw_response.parse()
|
||||
return headers, response
|
||||
else:
|
||||
response = await openai_aclient.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
|
||||
return None, response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def make_sync_openai_audio_transcriptions_request(
|
||||
self,
|
||||
openai_client: OpenAI,
|
||||
data: dict,
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
):
|
||||
"""
|
||||
Helper to:
|
||||
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
|
||||
- call openai_aclient.audio.transcriptions.create by default
|
||||
"""
|
||||
try:
|
||||
if litellm.return_response_headers is True:
|
||||
raw_response = (
|
||||
openai_client.audio.transcriptions.with_raw_response.create(
|
||||
**data, timeout=timeout
|
||||
)
|
||||
) # type: ignore
|
||||
headers = dict(raw_response.headers)
|
||||
response = raw_response.parse()
|
||||
return headers, response
|
||||
else:
|
||||
response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
|
||||
return None, response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def audio_transcriptions(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: BinaryIO,
|
||||
optional_params: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
max_retries: int,
|
||||
api_key: Optional[str],
|
||||
api_base: Optional[str],
|
||||
client=None,
|
||||
logging_obj=None,
|
||||
atranscription: bool = False,
|
||||
):
|
||||
data = {"model": model, "file": audio_file, **optional_params}
|
||||
if atranscription is True:
|
||||
return self.async_audio_transcriptions(
|
||||
audio_file=audio_file,
|
||||
data=data,
|
||||
model_response=model_response,
|
||||
timeout=timeout,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
client=client,
|
||||
max_retries=max_retries,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
|
||||
openai_client = self._get_openai_client(
|
||||
is_async=False,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
)
|
||||
_, response = self.make_sync_openai_audio_transcriptions_request(
|
||||
openai_client=openai_client,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
if isinstance(response, BaseModel):
|
||||
stringified_response = response.model_dump()
|
||||
else:
|
||||
stringified_response = TranscriptionResponse(text=response).model_dump()
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=audio_file.name,
|
||||
api_key=api_key,
|
||||
additional_args={"complete_input_dict": data},
|
||||
original_response=stringified_response,
|
||||
)
|
||||
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
|
||||
final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
|
||||
return final_response
|
||||
|
||||
async def async_audio_transcriptions(
|
||||
self,
|
||||
audio_file: BinaryIO,
|
||||
data: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
client=None,
|
||||
max_retries=None,
|
||||
):
|
||||
try:
|
||||
openai_aclient = self._get_openai_client(
|
||||
is_async=True,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
client=client,
|
||||
)
|
||||
|
||||
headers, response = await self.make_openai_audio_transcriptions_request(
|
||||
openai_aclient=openai_aclient,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
logging_obj.model_call_details["response_headers"] = headers
|
||||
if isinstance(response, BaseModel):
|
||||
stringified_response = response.model_dump()
|
||||
else:
|
||||
stringified_response = TranscriptionResponse(text=response).model_dump()
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=audio_file.name,
|
||||
api_key=api_key,
|
||||
additional_args={"complete_input_dict": data},
|
||||
original_response=stringified_response,
|
||||
)
|
||||
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
|
||||
return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
|
||||
except Exception as e:
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=input,
|
||||
api_key=api_key,
|
||||
original_response=str(e),
|
||||
)
|
||||
raise e
|
||||
|
||||
def audio_speech(
|
||||
self,
|
||||
model: str,
|
|
@ -1,7 +1,7 @@
|
|||
import json
|
||||
import types # type: ignore
|
||||
import uuid
|
||||
from typing import Any, BinaryIO, Callable, Optional, Union
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
|
@ -19,8 +19,8 @@ from litellm.utils import (
|
|||
convert_to_model_response_object,
|
||||
)
|
||||
|
||||
from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig
|
||||
from .base import BaseLLM
|
||||
from .OpenAI.openai import OpenAITextCompletion, OpenAITextCompletionConfig
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
openai_text_completion_config = OpenAITextCompletionConfig()
|
||||
|
|
|
@ -22,18 +22,7 @@ import uuid
|
|||
from concurrent.futures import ThreadPoolExecutor
|
||||
from copy import deepcopy
|
||||
from functools import partial
|
||||
from typing import (
|
||||
Any,
|
||||
BinaryIO,
|
||||
Callable,
|
||||
Dict,
|
||||
List,
|
||||
Literal,
|
||||
Mapping,
|
||||
Optional,
|
||||
Type,
|
||||
Union,
|
||||
)
|
||||
from typing import Any, Callable, Dict, List, Literal, Mapping, Optional, Type, Union
|
||||
|
||||
import dotenv
|
||||
import httpx
|
||||
|
@ -93,8 +82,9 @@ from .llms import (
|
|||
from .llms.AI21 import completion as ai21
|
||||
from .llms.anthropic.chat import AnthropicChatCompletion
|
||||
from .llms.anthropic.completion import AnthropicTextCompletion
|
||||
from .llms.azure import AzureChatCompletion, _check_dynamic_azure_params
|
||||
from .llms.azure_text import AzureTextCompletion
|
||||
from .llms.AzureOpenAI.audio_transcriptions import AzureAudioTranscription
|
||||
from .llms.AzureOpenAI.azure import AzureChatCompletion, _check_dynamic_azure_params
|
||||
from .llms.bedrock import image_generation as bedrock_image_generation # type: ignore
|
||||
from .llms.bedrock.chat import BedrockConverseLLM, BedrockLLM
|
||||
from .llms.bedrock.embed.embedding import BedrockEmbedding
|
||||
|
@ -104,7 +94,8 @@ from .llms.cohere import embed as cohere_embed
|
|||
from .llms.custom_llm import CustomLLM, custom_chat_llm_router
|
||||
from .llms.databricks import DatabricksChatCompletion
|
||||
from .llms.huggingface_restapi import Huggingface
|
||||
from .llms.openai import OpenAIChatCompletion, OpenAITextCompletion
|
||||
from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
|
||||
from .llms.OpenAI.openai import OpenAIChatCompletion, OpenAITextCompletion
|
||||
from .llms.predibase import PredibaseChatCompletion
|
||||
from .llms.prompt_templates.factory import (
|
||||
custom_prompt,
|
||||
|
@ -146,6 +137,7 @@ from .types.llms.openai import HttpxBinaryResponseContent
|
|||
from .types.utils import (
|
||||
AdapterCompletionStreamWrapper,
|
||||
ChatCompletionMessageToolCall,
|
||||
FileTypes,
|
||||
HiddenParams,
|
||||
all_litellm_params,
|
||||
)
|
||||
|
@ -169,11 +161,13 @@ from litellm.utils import (
|
|||
####### ENVIRONMENT VARIABLES ###################
|
||||
openai_chat_completions = OpenAIChatCompletion()
|
||||
openai_text_completions = OpenAITextCompletion()
|
||||
openai_audio_transcriptions = OpenAIAudioTranscription()
|
||||
databricks_chat_completions = DatabricksChatCompletion()
|
||||
anthropic_chat_completions = AnthropicChatCompletion()
|
||||
anthropic_text_completions = AnthropicTextCompletion()
|
||||
azure_chat_completions = AzureChatCompletion()
|
||||
azure_text_completions = AzureTextCompletion()
|
||||
azure_audio_transcriptions = AzureAudioTranscription()
|
||||
huggingface = Huggingface()
|
||||
predibase_chat_completions = PredibaseChatCompletion()
|
||||
codestral_text_completions = CodestralTextCompletion()
|
||||
|
@ -4614,7 +4608,7 @@ async def atranscription(*args, **kwargs) -> TranscriptionResponse:
|
|||
@client
|
||||
def transcription(
|
||||
model: str,
|
||||
file: BinaryIO,
|
||||
file: FileTypes,
|
||||
## OPTIONAL OPENAI PARAMS ##
|
||||
language: Optional[str] = None,
|
||||
prompt: Optional[str] = None,
|
||||
|
@ -4704,7 +4698,7 @@ def transcription(
|
|||
or get_secret("AZURE_API_KEY")
|
||||
) # type: ignore
|
||||
|
||||
response = azure_chat_completions.audio_transcriptions(
|
||||
response = azure_audio_transcriptions.audio_transcriptions(
|
||||
model=model,
|
||||
audio_file=file,
|
||||
optional_params=optional_params,
|
||||
|
@ -4738,7 +4732,7 @@ def transcription(
|
|||
or litellm.openai_key
|
||||
or get_secret("OPENAI_API_KEY")
|
||||
) # type: ignore
|
||||
response = openai_chat_completions.audio_transcriptions(
|
||||
response = openai_audio_transcriptions.audio_transcriptions(
|
||||
model=model,
|
||||
audio_file=file,
|
||||
optional_params=optional_params,
|
||||
|
|
|
@ -24,18 +24,7 @@ import traceback
|
|||
import uuid
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from typing import (
|
||||
Any,
|
||||
BinaryIO,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
Literal,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypedDict,
|
||||
Union,
|
||||
)
|
||||
from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, TypedDict, Union
|
||||
|
||||
import httpx
|
||||
import openai
|
||||
|
@ -48,7 +37,7 @@ from litellm.assistants.main import AssistantDeleted
|
|||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
||||
from litellm.llms.azure import get_azure_ad_token_from_oidc
|
||||
from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc
|
||||
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
||||
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
|
||||
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
|
||||
|
@ -1342,7 +1331,7 @@ class Router:
|
|||
self.fail_calls[model_name] += 1
|
||||
raise e
|
||||
|
||||
async def atranscription(self, file: BinaryIO, model: str, **kwargs):
|
||||
async def atranscription(self, file: FileTypes, model: str, **kwargs):
|
||||
"""
|
||||
Example Usage:
|
||||
|
||||
|
@ -1386,7 +1375,7 @@ class Router:
|
|||
)
|
||||
raise e
|
||||
|
||||
async def _atranscription(self, file: BinaryIO, model: str, **kwargs):
|
||||
async def _atranscription(self, file: FileTypes, model: str, **kwargs):
|
||||
try:
|
||||
verbose_router_logger.debug(
|
||||
f"Inside _atranscription()- model: {model}; kwargs: {kwargs}"
|
||||
|
|
|
@ -1,16 +1,5 @@
|
|||
from os import PathLike
|
||||
from typing import (
|
||||
IO,
|
||||
Any,
|
||||
BinaryIO,
|
||||
Iterable,
|
||||
List,
|
||||
Literal,
|
||||
Mapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union
|
||||
|
||||
from openai._legacy_response import HttpxBinaryResponseContent
|
||||
from openai.lib.streaming._assistants import (
|
||||
|
|
|
@ -5,6 +5,7 @@ from enum import Enum
|
|||
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
||||
|
||||
from openai._models import BaseModel as OpenAIObject
|
||||
from openai.types.audio.transcription_create_params import FileTypes
|
||||
from openai.types.completion_usage import CompletionUsage
|
||||
from pydantic import ConfigDict, Field, PrivateAttr
|
||||
from typing_extensions import Callable, Dict, Required, TypedDict, override
|
||||
|
|
|
@ -58,7 +58,7 @@ import litellm.litellm_core_utils
|
|||
import litellm.litellm_core_utils.json_validation_rule
|
||||
from litellm.caching import DualCache
|
||||
from litellm.litellm_core_utils.core_helpers import (
|
||||
get_file_check_sum,
|
||||
get_audio_file_name,
|
||||
map_finish_reason,
|
||||
)
|
||||
from litellm.litellm_core_utils.exception_mapping_utils import get_error_message
|
||||
|
@ -86,6 +86,7 @@ from litellm.types.utils import (
|
|||
Delta,
|
||||
Embedding,
|
||||
EmbeddingResponse,
|
||||
FileTypes,
|
||||
ImageResponse,
|
||||
Message,
|
||||
ModelInfo,
|
||||
|
@ -161,7 +162,6 @@ except Exception as e:
|
|||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import (
|
||||
Any,
|
||||
BinaryIO,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
|
@ -566,14 +566,13 @@ def function_setup(
|
|||
call_type == CallTypes.atranscription.value
|
||||
or call_type == CallTypes.transcription.value
|
||||
):
|
||||
_file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
|
||||
file_checksum = get_file_check_sum(_file=_file_name)
|
||||
file_name = _file_name.name
|
||||
_file_obj: FileTypes = args[1] if len(args) > 1 else kwargs["file"]
|
||||
file_checksum = get_audio_file_name(file_obj=_file_obj)
|
||||
if "metadata" in kwargs:
|
||||
kwargs["metadata"]["file_checksum"] = file_checksum
|
||||
else:
|
||||
kwargs["metadata"] = {"file_checksum": file_checksum}
|
||||
messages = file_name
|
||||
messages = _file_obj
|
||||
elif (
|
||||
call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value
|
||||
):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue