use correct type hints for audio transcriptions

This commit is contained in:
Ishaan Jaff 2024-09-05 09:12:27 -07:00
parent a9be7dd79b
commit 81ee1653af
13 changed files with 420 additions and 430 deletions

View file

@ -20,8 +20,8 @@ import httpx
import litellm
from litellm import client
from litellm.llms.azure import AzureBatchesAPI
from litellm.llms.openai import OpenAIBatchesAPI
from litellm.llms.AzureOpenAI.azure import AzureBatchesAPI
from litellm.llms.OpenAI.openai import OpenAIBatchesAPI
from litellm.secret_managers.main import get_secret
from litellm.types.llms.openai import (
Batch,

View file

@ -17,7 +17,7 @@ import time
import traceback
from datetime import timedelta
from enum import Enum
from typing import Any, BinaryIO, List, Literal, Optional, Union
from typing import Any, List, Literal, Optional, Union
from openai._models import BaseModel as OpenAIObject

View file

@ -1,9 +1,10 @@
# What is this?
## Helper utilities
import os
from typing import BinaryIO, List, Literal, Optional, Tuple
from typing import List, Literal, Optional, Tuple
from litellm._logging import verbose_logger
from litellm.types.utils import FileTypes
def map_finish_reason(
@ -88,18 +89,19 @@ def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None):
return None
def get_file_check_sum(_file: BinaryIO):
def get_audio_file_name(file_obj: FileTypes) -> str:
"""
Helper to safely get file checksum - used as a cache key
Safely get the name of a file-like object or return its string representation.
Args:
file_obj (Any): A file-like object or any other object.
Returns:
str: The name of the file if available, otherwise a string representation of the object.
"""
try:
file_descriptor = _file.fileno()
file_stat = os.fstat(file_descriptor)
file_size = str(file_stat.st_size)
file_checksum = _file.name + file_size
return file_checksum
except Exception as e:
verbose_logger.error(f"Error getting file_checksum: {(str(e))}")
file_checksum = _file.name
return file_checksum
return file_checksum
if hasattr(file_obj, "name"):
return getattr(file_obj, "name")
elif hasattr(file_obj, "__str__"):
return str(file_obj)
else:
return repr(file_obj)

View file

@ -0,0 +1,192 @@
import uuid
from typing import Optional, Union
import httpx
from openai import AsyncAzureOpenAI, AzureOpenAI
from pydantic import BaseModel
import litellm
from litellm.litellm_core_utils.core_helpers import get_audio_file_name
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.types.utils import FileTypes
from litellm.utils import TranscriptionResponse, convert_to_model_response_object
from .azure import (
AzureChatCompletion,
get_azure_ad_token_from_oidc,
select_azure_base_url_or_endpoint,
)
class AzureAudioTranscription(AzureChatCompletion):
def audio_transcriptions(
self,
model: str,
audio_file: FileTypes,
optional_params: dict,
model_response: TranscriptionResponse,
timeout: float,
max_retries: int,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_version: Optional[str] = None,
client=None,
azure_ad_token: Optional[str] = None,
logging_obj=None,
atranscription: bool = False,
):
data = {"model": model, "file": audio_file, **optional_params}
# init AzureOpenAI Client
azure_client_params = {
"api_version": api_version,
"azure_endpoint": api_base,
"azure_deployment": model,
"timeout": timeout,
}
azure_client_params = select_azure_base_url_or_endpoint(
azure_client_params=azure_client_params
)
if api_key is not None:
azure_client_params["api_key"] = api_key
elif azure_ad_token is not None:
if azure_ad_token.startswith("oidc/"):
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
azure_client_params["azure_ad_token"] = azure_ad_token
if max_retries is not None:
azure_client_params["max_retries"] = max_retries
if atranscription is True:
return self.async_audio_transcriptions(
audio_file=audio_file,
data=data,
model_response=model_response,
timeout=timeout,
api_key=api_key,
api_base=api_base,
client=client,
azure_client_params=azure_client_params,
max_retries=max_retries,
logging_obj=logging_obj,
)
if client is None:
azure_client = AzureOpenAI(http_client=litellm.client_session, **azure_client_params) # type: ignore
else:
azure_client = client
## LOGGING
logging_obj.pre_call(
input=f"audio_file_{uuid.uuid4()}",
api_key=azure_client.api_key,
additional_args={
"headers": {"Authorization": f"Bearer {azure_client.api_key}"},
"api_base": azure_client._base_url._uri_reference,
"atranscription": True,
"complete_input_dict": data,
},
)
response = azure_client.audio.transcriptions.create(
**data, timeout=timeout # type: ignore
)
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=get_audio_file_name(audio_file),
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
return final_response
async def async_audio_transcriptions(
self,
audio_file: FileTypes,
data: dict,
model_response: TranscriptionResponse,
timeout: float,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
client=None,
azure_client_params=None,
max_retries=None,
logging_obj=None,
):
response = None
try:
if client is None:
async_azure_client = AsyncAzureOpenAI(
**azure_client_params,
http_client=litellm.aclient_session,
)
else:
async_azure_client = client
## LOGGING
logging_obj.pre_call(
input=f"audio_file_{uuid.uuid4()}",
api_key=async_azure_client.api_key,
additional_args={
"headers": {
"Authorization": f"Bearer {async_azure_client.api_key}"
},
"api_base": async_azure_client._base_url._uri_reference,
"atranscription": True,
"complete_input_dict": data,
},
)
raw_response = (
await async_azure_client.audio.transcriptions.with_raw_response.create(
**data, timeout=timeout
)
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=get_audio_file_name(audio_file),
api_key=api_key,
additional_args={
"headers": {
"Authorization": f"Bearer {async_azure_client.api_key}"
},
"api_base": async_azure_client._base_url._uri_reference,
"atranscription": True,
"complete_input_dict": data,
},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
response = convert_to_model_response_object(
_response_headers=headers,
response_object=stringified_response,
model_response_object=model_response,
hidden_params=hidden_params,
response_type="audio_transcription",
) # type: ignore
return response
except Exception as e:
## LOGGING
logging_obj.post_call(
input=input,
api_key=api_key,
original_response=str(e),
)
raise e

View file

@ -4,17 +4,7 @@ import os
import time
import types
import uuid
from typing import (
Any,
BinaryIO,
Callable,
Coroutine,
Iterable,
List,
Literal,
Optional,
Union,
)
from typing import Any, Callable, Coroutine, Iterable, List, Literal, Optional, Union
import httpx # type: ignore
import requests
@ -27,6 +17,7 @@ from litellm import ImageResponse, OpenAIConfig
from litellm.caching import DualCache
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.types.utils import FileTypes
from litellm.utils import (
Choices,
CustomStreamWrapper,
@ -39,7 +30,7 @@ from litellm.utils import (
modify_url,
)
from ..types.llms.openai import (
from ...types.llms.openai import (
Assistant,
AssistantEventHandler,
AssistantStreamManager,
@ -63,7 +54,7 @@ from ..types.llms.openai import (
SyncCursorPage,
Thread,
)
from .base import BaseLLM
from ..base import BaseLLM
azure_ad_cache = DualCache()
@ -1570,178 +1561,6 @@ class AzureChatCompletion(BaseLLM):
else:
raise AzureOpenAIError(status_code=500, message=str(e))
def audio_transcriptions(
self,
model: str,
audio_file: BinaryIO,
optional_params: dict,
model_response: TranscriptionResponse,
timeout: float,
max_retries: int,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_version: Optional[str] = None,
client=None,
azure_ad_token: Optional[str] = None,
logging_obj=None,
atranscription: bool = False,
):
data = {"model": model, "file": audio_file, **optional_params}
# init AzureOpenAI Client
azure_client_params = {
"api_version": api_version,
"azure_endpoint": api_base,
"azure_deployment": model,
"timeout": timeout,
}
azure_client_params = select_azure_base_url_or_endpoint(
azure_client_params=azure_client_params
)
if api_key is not None:
azure_client_params["api_key"] = api_key
elif azure_ad_token is not None:
if azure_ad_token.startswith("oidc/"):
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
azure_client_params["azure_ad_token"] = azure_ad_token
if max_retries is not None:
azure_client_params["max_retries"] = max_retries
if atranscription is True:
return self.async_audio_transcriptions(
audio_file=audio_file,
data=data,
model_response=model_response,
timeout=timeout,
api_key=api_key,
api_base=api_base,
client=client,
azure_client_params=azure_client_params,
max_retries=max_retries,
logging_obj=logging_obj,
)
if client is None:
azure_client = AzureOpenAI(http_client=litellm.client_session, **azure_client_params) # type: ignore
else:
azure_client = client
## LOGGING
logging_obj.pre_call(
input=f"audio_file_{uuid.uuid4()}",
api_key=azure_client.api_key,
additional_args={
"headers": {"Authorization": f"Bearer {azure_client.api_key}"},
"api_base": azure_client._base_url._uri_reference,
"atranscription": True,
"complete_input_dict": data,
},
)
response = azure_client.audio.transcriptions.create(
**data, timeout=timeout # type: ignore
)
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=audio_file.name,
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
return final_response
async def async_audio_transcriptions(
self,
audio_file: BinaryIO,
data: dict,
model_response: TranscriptionResponse,
timeout: float,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
client=None,
azure_client_params=None,
max_retries=None,
logging_obj=None,
):
response = None
try:
if client is None:
async_azure_client = AsyncAzureOpenAI(
**azure_client_params,
http_client=litellm.aclient_session,
)
else:
async_azure_client = client
## LOGGING
logging_obj.pre_call(
input=f"audio_file_{uuid.uuid4()}",
api_key=async_azure_client.api_key,
additional_args={
"headers": {
"Authorization": f"Bearer {async_azure_client.api_key}"
},
"api_base": async_azure_client._base_url._uri_reference,
"atranscription": True,
"complete_input_dict": data,
},
)
raw_response = (
await async_azure_client.audio.transcriptions.with_raw_response.create(
**data, timeout=timeout
)
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=audio_file.name,
api_key=api_key,
additional_args={
"headers": {
"Authorization": f"Bearer {async_azure_client.api_key}"
},
"api_base": async_azure_client._base_url._uri_reference,
"atranscription": True,
"complete_input_dict": data,
},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
response = convert_to_model_response_object(
_response_headers=headers,
response_object=stringified_response,
model_response_object=model_response,
hidden_params=hidden_params,
response_type="audio_transcription",
) # type: ignore
return response
except Exception as e:
## LOGGING
logging_obj.post_call(
input=input,
api_key=api_key,
original_response=str(e),
)
raise e
def audio_speech(
self,
model: str,

View file

@ -0,0 +1,177 @@
from typing import Optional, Union
import httpx
from openai import AsyncOpenAI, OpenAI
from pydantic import BaseModel
import litellm
from litellm.litellm_core_utils.core_helpers import get_audio_file_name
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.types.utils import FileTypes
from litellm.utils import TranscriptionResponse, convert_to_model_response_object
from .openai import OpenAIChatCompletion
class OpenAIAudioTranscription(OpenAIChatCompletion):
# Audio Transcriptions
async def make_openai_audio_transcriptions_request(
self,
openai_aclient: AsyncOpenAI,
data: dict,
timeout: Union[float, httpx.Timeout],
):
"""
Helper to:
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
- call openai_aclient.audio.transcriptions.create by default
"""
try:
if litellm.return_response_headers is True:
raw_response = (
await openai_aclient.audio.transcriptions.with_raw_response.create(
**data, timeout=timeout
)
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
return headers, response
else:
response = await openai_aclient.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
return None, response
except Exception as e:
raise e
def make_sync_openai_audio_transcriptions_request(
self,
openai_client: OpenAI,
data: dict,
timeout: Union[float, httpx.Timeout],
):
"""
Helper to:
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
- call openai_aclient.audio.transcriptions.create by default
"""
try:
if litellm.return_response_headers is True:
raw_response = (
openai_client.audio.transcriptions.with_raw_response.create(
**data, timeout=timeout
)
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
return headers, response
else:
response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
return None, response
except Exception as e:
raise e
def audio_transcriptions(
self,
model: str,
audio_file: FileTypes,
optional_params: dict,
model_response: TranscriptionResponse,
timeout: float,
max_retries: int,
api_key: Optional[str],
api_base: Optional[str],
client=None,
logging_obj=None,
atranscription: bool = False,
):
data = {"model": model, "file": audio_file, **optional_params}
if atranscription is True:
return self.async_audio_transcriptions(
audio_file=audio_file,
data=data,
model_response=model_response,
timeout=timeout,
api_key=api_key,
api_base=api_base,
client=client,
max_retries=max_retries,
logging_obj=logging_obj,
)
openai_client = self._get_openai_client(
is_async=False,
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
)
_, response = self.make_sync_openai_audio_transcriptions_request(
openai_client=openai_client,
data=data,
timeout=timeout,
)
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=get_audio_file_name(audio_file),
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
return final_response
async def async_audio_transcriptions(
self,
audio_file: FileTypes,
data: dict,
model_response: TranscriptionResponse,
timeout: float,
logging_obj: LiteLLMLoggingObj,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
client=None,
max_retries=None,
):
try:
openai_aclient = self._get_openai_client(
is_async=True,
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
client=client,
)
headers, response = await self.make_openai_audio_transcriptions_request(
openai_aclient=openai_aclient,
data=data,
timeout=timeout,
)
logging_obj.model_call_details["response_headers"] = headers
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=get_audio_file_name(audio_file),
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
except Exception as e:
## LOGGING
logging_obj.post_call(
input=input,
api_key=api_key,
original_response=str(e),
)
raise e

View file

@ -4,16 +4,7 @@ import os
import time
import traceback
import types
from typing import (
Any,
BinaryIO,
Callable,
Coroutine,
Iterable,
Literal,
Optional,
Union,
)
from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union
import httpx
import openai
@ -33,14 +24,13 @@ from litellm.utils import (
Message,
ModelResponse,
TextCompletionResponse,
TranscriptionResponse,
Usage,
convert_to_model_response_object,
)
from ..types.llms.openai import *
from .base import BaseLLM
from .prompt_templates.factory import custom_prompt, prompt_factory
from ...types.llms.openai import *
from ..base import BaseLLM
from ..prompt_templates.factory import custom_prompt, prompt_factory
class OpenAIError(Exception):
@ -1608,168 +1598,6 @@ class OpenAIChatCompletion(BaseLLM):
else:
raise OpenAIError(status_code=500, message=str(e))
# Audio Transcriptions
async def make_openai_audio_transcriptions_request(
self,
openai_aclient: AsyncOpenAI,
data: dict,
timeout: Union[float, httpx.Timeout],
):
"""
Helper to:
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
- call openai_aclient.audio.transcriptions.create by default
"""
try:
if litellm.return_response_headers is True:
raw_response = (
await openai_aclient.audio.transcriptions.with_raw_response.create(
**data, timeout=timeout
)
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
return headers, response
else:
response = await openai_aclient.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
return None, response
except Exception as e:
raise e
def make_sync_openai_audio_transcriptions_request(
self,
openai_client: OpenAI,
data: dict,
timeout: Union[float, httpx.Timeout],
):
"""
Helper to:
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
- call openai_aclient.audio.transcriptions.create by default
"""
try:
if litellm.return_response_headers is True:
raw_response = (
openai_client.audio.transcriptions.with_raw_response.create(
**data, timeout=timeout
)
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
return headers, response
else:
response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
return None, response
except Exception as e:
raise e
def audio_transcriptions(
self,
model: str,
audio_file: BinaryIO,
optional_params: dict,
model_response: TranscriptionResponse,
timeout: float,
max_retries: int,
api_key: Optional[str],
api_base: Optional[str],
client=None,
logging_obj=None,
atranscription: bool = False,
):
data = {"model": model, "file": audio_file, **optional_params}
if atranscription is True:
return self.async_audio_transcriptions(
audio_file=audio_file,
data=data,
model_response=model_response,
timeout=timeout,
api_key=api_key,
api_base=api_base,
client=client,
max_retries=max_retries,
logging_obj=logging_obj,
)
openai_client = self._get_openai_client(
is_async=False,
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
)
_, response = self.make_sync_openai_audio_transcriptions_request(
openai_client=openai_client,
data=data,
timeout=timeout,
)
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=audio_file.name,
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
return final_response
async def async_audio_transcriptions(
self,
audio_file: BinaryIO,
data: dict,
model_response: TranscriptionResponse,
timeout: float,
logging_obj: LiteLLMLoggingObj,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
client=None,
max_retries=None,
):
try:
openai_aclient = self._get_openai_client(
is_async=True,
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
client=client,
)
headers, response = await self.make_openai_audio_transcriptions_request(
openai_aclient=openai_aclient,
data=data,
timeout=timeout,
)
logging_obj.model_call_details["response_headers"] = headers
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=audio_file.name,
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
except Exception as e:
## LOGGING
logging_obj.post_call(
input=input,
api_key=api_key,
original_response=str(e),
)
raise e
def audio_speech(
self,
model: str,

View file

@ -1,7 +1,7 @@
import json
import types # type: ignore
import uuid
from typing import Any, BinaryIO, Callable, Optional, Union
from typing import Any, Callable, Optional, Union
import httpx
import requests
@ -19,8 +19,8 @@ from litellm.utils import (
convert_to_model_response_object,
)
from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig
from .base import BaseLLM
from .OpenAI.openai import OpenAITextCompletion, OpenAITextCompletionConfig
from .prompt_templates.factory import custom_prompt, prompt_factory
openai_text_completion_config = OpenAITextCompletionConfig()

View file

@ -22,18 +22,7 @@ import uuid
from concurrent.futures import ThreadPoolExecutor
from copy import deepcopy
from functools import partial
from typing import (
Any,
BinaryIO,
Callable,
Dict,
List,
Literal,
Mapping,
Optional,
Type,
Union,
)
from typing import Any, Callable, Dict, List, Literal, Mapping, Optional, Type, Union
import dotenv
import httpx
@ -93,8 +82,9 @@ from .llms import (
from .llms.AI21 import completion as ai21
from .llms.anthropic.chat import AnthropicChatCompletion
from .llms.anthropic.completion import AnthropicTextCompletion
from .llms.azure import AzureChatCompletion, _check_dynamic_azure_params
from .llms.azure_text import AzureTextCompletion
from .llms.AzureOpenAI.audio_transcriptions import AzureAudioTranscription
from .llms.AzureOpenAI.azure import AzureChatCompletion, _check_dynamic_azure_params
from .llms.bedrock import image_generation as bedrock_image_generation # type: ignore
from .llms.bedrock.chat import BedrockConverseLLM, BedrockLLM
from .llms.bedrock.embed.embedding import BedrockEmbedding
@ -104,7 +94,8 @@ from .llms.cohere import embed as cohere_embed
from .llms.custom_llm import CustomLLM, custom_chat_llm_router
from .llms.databricks import DatabricksChatCompletion
from .llms.huggingface_restapi import Huggingface
from .llms.openai import OpenAIChatCompletion, OpenAITextCompletion
from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
from .llms.OpenAI.openai import OpenAIChatCompletion, OpenAITextCompletion
from .llms.predibase import PredibaseChatCompletion
from .llms.prompt_templates.factory import (
custom_prompt,
@ -146,6 +137,7 @@ from .types.llms.openai import HttpxBinaryResponseContent
from .types.utils import (
AdapterCompletionStreamWrapper,
ChatCompletionMessageToolCall,
FileTypes,
HiddenParams,
all_litellm_params,
)
@ -169,11 +161,13 @@ from litellm.utils import (
####### ENVIRONMENT VARIABLES ###################
openai_chat_completions = OpenAIChatCompletion()
openai_text_completions = OpenAITextCompletion()
openai_audio_transcriptions = OpenAIAudioTranscription()
databricks_chat_completions = DatabricksChatCompletion()
anthropic_chat_completions = AnthropicChatCompletion()
anthropic_text_completions = AnthropicTextCompletion()
azure_chat_completions = AzureChatCompletion()
azure_text_completions = AzureTextCompletion()
azure_audio_transcriptions = AzureAudioTranscription()
huggingface = Huggingface()
predibase_chat_completions = PredibaseChatCompletion()
codestral_text_completions = CodestralTextCompletion()
@ -4614,7 +4608,7 @@ async def atranscription(*args, **kwargs) -> TranscriptionResponse:
@client
def transcription(
model: str,
file: BinaryIO,
file: FileTypes,
## OPTIONAL OPENAI PARAMS ##
language: Optional[str] = None,
prompt: Optional[str] = None,
@ -4704,7 +4698,7 @@ def transcription(
or get_secret("AZURE_API_KEY")
) # type: ignore
response = azure_chat_completions.audio_transcriptions(
response = azure_audio_transcriptions.audio_transcriptions(
model=model,
audio_file=file,
optional_params=optional_params,
@ -4738,7 +4732,7 @@ def transcription(
or litellm.openai_key
or get_secret("OPENAI_API_KEY")
) # type: ignore
response = openai_chat_completions.audio_transcriptions(
response = openai_audio_transcriptions.audio_transcriptions(
model=model,
audio_file=file,
optional_params=optional_params,

View file

@ -24,18 +24,7 @@ import traceback
import uuid
from collections import defaultdict
from datetime import datetime
from typing import (
Any,
BinaryIO,
Dict,
Iterable,
List,
Literal,
Optional,
Tuple,
TypedDict,
Union,
)
from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, TypedDict, Union
import httpx
import openai
@ -48,7 +37,7 @@ from litellm.assistants.main import AssistantDeleted
from litellm.caching import DualCache, InMemoryCache, RedisCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
from litellm.llms.azure import get_azure_ad_token_from_oidc
from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
@ -1342,7 +1331,7 @@ class Router:
self.fail_calls[model_name] += 1
raise e
async def atranscription(self, file: BinaryIO, model: str, **kwargs):
async def atranscription(self, file: FileTypes, model: str, **kwargs):
"""
Example Usage:
@ -1386,7 +1375,7 @@ class Router:
)
raise e
async def _atranscription(self, file: BinaryIO, model: str, **kwargs):
async def _atranscription(self, file: FileTypes, model: str, **kwargs):
try:
verbose_router_logger.debug(
f"Inside _atranscription()- model: {model}; kwargs: {kwargs}"

View file

@ -1,16 +1,5 @@
from os import PathLike
from typing import (
IO,
Any,
BinaryIO,
Iterable,
List,
Literal,
Mapping,
Optional,
Tuple,
Union,
)
from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union
from openai._legacy_response import HttpxBinaryResponseContent
from openai.lib.streaming._assistants import (

View file

@ -5,6 +5,7 @@ from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
from openai._models import BaseModel as OpenAIObject
from openai.types.audio.transcription_create_params import FileTypes
from openai.types.completion_usage import CompletionUsage
from pydantic import ConfigDict, Field, PrivateAttr
from typing_extensions import Callable, Dict, Required, TypedDict, override

View file

@ -58,7 +58,7 @@ import litellm.litellm_core_utils
import litellm.litellm_core_utils.json_validation_rule
from litellm.caching import DualCache
from litellm.litellm_core_utils.core_helpers import (
get_file_check_sum,
get_audio_file_name,
map_finish_reason,
)
from litellm.litellm_core_utils.exception_mapping_utils import get_error_message
@ -86,6 +86,7 @@ from litellm.types.utils import (
Delta,
Embedding,
EmbeddingResponse,
FileTypes,
ImageResponse,
Message,
ModelInfo,
@ -161,7 +162,6 @@ except Exception as e:
from concurrent.futures import ThreadPoolExecutor
from typing import (
Any,
BinaryIO,
Callable,
Dict,
Iterable,
@ -566,14 +566,13 @@ def function_setup(
call_type == CallTypes.atranscription.value
or call_type == CallTypes.transcription.value
):
_file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
file_checksum = get_file_check_sum(_file=_file_name)
file_name = _file_name.name
_file_obj: FileTypes = args[1] if len(args) > 1 else kwargs["file"]
file_checksum = get_audio_file_name(file_obj=_file_obj)
if "metadata" in kwargs:
kwargs["metadata"]["file_checksum"] = file_checksum
else:
kwargs["metadata"] = {"file_checksum": file_checksum}
messages = file_name
messages = _file_obj
elif (
call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value
):