Merge pull request #5534 from BerriAI/litellm_fix_transcription

[Fix] transcription/atranscription file parameter should accept correct types
This commit is contained in:
Ishaan Jaff 2024-09-05 18:41:36 -07:00 committed by GitHub
commit 8dbf372c6f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 471 additions and 458 deletions

View file

@ -924,7 +924,7 @@ from .llms.bedrock.embed.amazon_titan_v2_transformation import (
AmazonTitanV2Config,
)
from .llms.bedrock.embed.cohere_transformation import BedrockCohereEmbeddingConfig
from .llms.openai import (
from .llms.OpenAI.openai import (
OpenAIConfig,
OpenAITextCompletionConfig,
MistralConfig,
@ -939,7 +939,7 @@ from .llms.AI21.chat import AI21ChatConfig
from .llms.fireworks_ai import FireworksAIConfig
from .llms.volcengine import VolcEngineConfig
from .llms.text_completion_codestral import MistralTextCompletionConfig
from .llms.azure import (
from .llms.AzureOpenAI.azure import (
AzureOpenAIConfig,
AzureOpenAIError,
AzureOpenAIAssistantsAPIConfig,

View file

@ -21,8 +21,8 @@ from litellm.utils import (
supports_httpx_timeout,
)
from ..llms.azure import AzureAssistantsAPI
from ..llms.openai import OpenAIAssistantsAPI
from ..llms.AzureOpenAI.azure import AzureAssistantsAPI
from ..llms.OpenAI.openai import OpenAIAssistantsAPI
from ..types.llms.openai import *
from ..types.router import *
from .utils import get_optional_params_add_message
@ -184,6 +184,21 @@ def get_assistants(
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
),
)
if response is None:
raise litellm.exceptions.BadRequestError(
message="LiteLLM doesn't support {} for 'get_assistants'. Only 'openai' is supported.".format(
custom_llm_provider
),
model="n/a",
llm_provider=custom_llm_provider,
response=httpx.Response(
status_code=400,
content="Unsupported provider",
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
),
)
return response

View file

@ -20,8 +20,8 @@ import httpx
import litellm
from litellm import client
from litellm.llms.azure import AzureBatchesAPI
from litellm.llms.openai import OpenAIBatchesAPI
from litellm.llms.AzureOpenAI.azure import AzureBatchesAPI
from litellm.llms.OpenAI.openai import OpenAIBatchesAPI
from litellm.secret_managers.main import get_secret
from litellm.types.llms.openai import (
Batch,

View file

@ -17,7 +17,7 @@ import time
import traceback
from datetime import timedelta
from enum import Enum
from typing import Any, BinaryIO, List, Literal, Optional, Union
from typing import Any, List, Literal, Optional, Union
from openai._models import BaseModel as OpenAIObject

View file

@ -16,7 +16,7 @@ import httpx
import litellm
from litellm import client, get_secret
from litellm.llms.files_apis.azure import AzureOpenAIFilesAPI
from litellm.llms.openai import FileDeleted, FileObject, OpenAIFilesAPI
from litellm.llms.OpenAI.openai import FileDeleted, FileObject, OpenAIFilesAPI
from litellm.types.llms.openai import (
Batch,
CreateFileRequest,

View file

@ -0,0 +1,23 @@
"""
Utils used for litellm.transcription() and litellm.atranscription()
"""
from litellm.types.utils import FileTypes
def get_audio_file_name(file_obj: FileTypes) -> str:
"""
Safely get the name of a file-like object or return its string representation.
Args:
file_obj (Any): A file-like object or any other object.
Returns:
str: The name of the file if available, otherwise a string representation of the object.
"""
if hasattr(file_obj, "name"):
return getattr(file_obj, "name")
elif hasattr(file_obj, "__str__"):
return str(file_obj)
else:
return repr(file_obj)

View file

@ -1,7 +1,7 @@
# What is this?
## Helper utilities
import os
from typing import BinaryIO, List, Literal, Optional, Tuple
from typing import List, Literal, Optional, Tuple
from litellm._logging import verbose_logger
@ -86,20 +86,3 @@ def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None):
return kwargs["litellm_parent_otel_span"]
except:
return None
def get_file_check_sum(_file: BinaryIO):
"""
Helper to safely get file checksum - used as a cache key
"""
try:
file_descriptor = _file.fileno()
file_stat = os.fstat(file_descriptor)
file_size = str(file_stat.st_size)
file_checksum = _file.name + file_size
return file_checksum
except Exception as e:
verbose_logger.error(f"Error getting file_checksum: {(str(e))}")
file_checksum = _file.name
return file_checksum
return file_checksum

View file

@ -0,0 +1,192 @@
import uuid
from typing import Optional, Union
import httpx
from openai import AsyncAzureOpenAI, AzureOpenAI
from pydantic import BaseModel
import litellm
from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.types.utils import FileTypes
from litellm.utils import TranscriptionResponse, convert_to_model_response_object
from .azure import (
AzureChatCompletion,
get_azure_ad_token_from_oidc,
select_azure_base_url_or_endpoint,
)
class AzureAudioTranscription(AzureChatCompletion):
def audio_transcriptions(
self,
model: str,
audio_file: FileTypes,
optional_params: dict,
model_response: TranscriptionResponse,
timeout: float,
max_retries: int,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_version: Optional[str] = None,
client=None,
azure_ad_token: Optional[str] = None,
logging_obj=None,
atranscription: bool = False,
):
data = {"model": model, "file": audio_file, **optional_params}
# init AzureOpenAI Client
azure_client_params = {
"api_version": api_version,
"azure_endpoint": api_base,
"azure_deployment": model,
"timeout": timeout,
}
azure_client_params = select_azure_base_url_or_endpoint(
azure_client_params=azure_client_params
)
if api_key is not None:
azure_client_params["api_key"] = api_key
elif azure_ad_token is not None:
if azure_ad_token.startswith("oidc/"):
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
azure_client_params["azure_ad_token"] = azure_ad_token
if max_retries is not None:
azure_client_params["max_retries"] = max_retries
if atranscription is True:
return self.async_audio_transcriptions(
audio_file=audio_file,
data=data,
model_response=model_response,
timeout=timeout,
api_key=api_key,
api_base=api_base,
client=client,
azure_client_params=azure_client_params,
max_retries=max_retries,
logging_obj=logging_obj,
)
if client is None:
azure_client = AzureOpenAI(http_client=litellm.client_session, **azure_client_params) # type: ignore
else:
azure_client = client
## LOGGING
logging_obj.pre_call(
input=f"audio_file_{uuid.uuid4()}",
api_key=azure_client.api_key,
additional_args={
"headers": {"Authorization": f"Bearer {azure_client.api_key}"},
"api_base": azure_client._base_url._uri_reference,
"atranscription": True,
"complete_input_dict": data,
},
)
response = azure_client.audio.transcriptions.create(
**data, timeout=timeout # type: ignore
)
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=get_audio_file_name(audio_file),
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
return final_response
async def async_audio_transcriptions(
self,
audio_file: FileTypes,
data: dict,
model_response: TranscriptionResponse,
timeout: float,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
client=None,
azure_client_params=None,
max_retries=None,
logging_obj=None,
):
response = None
try:
if client is None:
async_azure_client = AsyncAzureOpenAI(
**azure_client_params,
http_client=litellm.aclient_session,
)
else:
async_azure_client = client
## LOGGING
logging_obj.pre_call(
input=f"audio_file_{uuid.uuid4()}",
api_key=async_azure_client.api_key,
additional_args={
"headers": {
"Authorization": f"Bearer {async_azure_client.api_key}"
},
"api_base": async_azure_client._base_url._uri_reference,
"atranscription": True,
"complete_input_dict": data,
},
)
raw_response = (
await async_azure_client.audio.transcriptions.with_raw_response.create(
**data, timeout=timeout
)
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=get_audio_file_name(audio_file),
api_key=api_key,
additional_args={
"headers": {
"Authorization": f"Bearer {async_azure_client.api_key}"
},
"api_base": async_azure_client._base_url._uri_reference,
"atranscription": True,
"complete_input_dict": data,
},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
response = convert_to_model_response_object(
_response_headers=headers,
response_object=stringified_response,
model_response_object=model_response,
hidden_params=hidden_params,
response_type="audio_transcription",
) # type: ignore
return response
except Exception as e:
## LOGGING
logging_obj.post_call(
input=input,
api_key=api_key,
original_response=str(e),
)
raise e

View file

@ -4,17 +4,7 @@ import os
import time
import types
import uuid
from typing import (
Any,
BinaryIO,
Callable,
Coroutine,
Iterable,
List,
Literal,
Optional,
Union,
)
from typing import Any, Callable, Coroutine, Iterable, List, Literal, Optional, Union
import httpx # type: ignore
import requests
@ -27,6 +17,7 @@ from litellm import ImageResponse, OpenAIConfig
from litellm.caching import DualCache
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.types.utils import FileTypes
from litellm.utils import (
Choices,
CustomStreamWrapper,
@ -39,7 +30,7 @@ from litellm.utils import (
modify_url,
)
from ..types.llms.openai import (
from ...types.llms.openai import (
Assistant,
AssistantEventHandler,
AssistantStreamManager,
@ -63,7 +54,7 @@ from ..types.llms.openai import (
SyncCursorPage,
Thread,
)
from .base import BaseLLM
from ..base import BaseLLM
azure_ad_cache = DualCache()
@ -1571,178 +1562,6 @@ class AzureChatCompletion(BaseLLM):
else:
raise AzureOpenAIError(status_code=500, message=str(e))
def audio_transcriptions(
self,
model: str,
audio_file: BinaryIO,
optional_params: dict,
model_response: TranscriptionResponse,
timeout: float,
max_retries: int,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_version: Optional[str] = None,
client=None,
azure_ad_token: Optional[str] = None,
logging_obj=None,
atranscription: bool = False,
):
data = {"model": model, "file": audio_file, **optional_params}
# init AzureOpenAI Client
azure_client_params = {
"api_version": api_version,
"azure_endpoint": api_base,
"azure_deployment": model,
"timeout": timeout,
}
azure_client_params = select_azure_base_url_or_endpoint(
azure_client_params=azure_client_params
)
if api_key is not None:
azure_client_params["api_key"] = api_key
elif azure_ad_token is not None:
if azure_ad_token.startswith("oidc/"):
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
azure_client_params["azure_ad_token"] = azure_ad_token
if max_retries is not None:
azure_client_params["max_retries"] = max_retries
if atranscription is True:
return self.async_audio_transcriptions(
audio_file=audio_file,
data=data,
model_response=model_response,
timeout=timeout,
api_key=api_key,
api_base=api_base,
client=client,
azure_client_params=azure_client_params,
max_retries=max_retries,
logging_obj=logging_obj,
)
if client is None:
azure_client = AzureOpenAI(http_client=litellm.client_session, **azure_client_params) # type: ignore
else:
azure_client = client
## LOGGING
logging_obj.pre_call(
input=f"audio_file_{uuid.uuid4()}",
api_key=azure_client.api_key,
additional_args={
"headers": {"Authorization": f"Bearer {azure_client.api_key}"},
"api_base": azure_client._base_url._uri_reference,
"atranscription": True,
"complete_input_dict": data,
},
)
response = azure_client.audio.transcriptions.create(
**data, timeout=timeout # type: ignore
)
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=audio_file.name,
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
return final_response
async def async_audio_transcriptions(
self,
audio_file: BinaryIO,
data: dict,
model_response: TranscriptionResponse,
timeout: float,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
client=None,
azure_client_params=None,
max_retries=None,
logging_obj=None,
):
response = None
try:
if client is None:
async_azure_client = AsyncAzureOpenAI(
**azure_client_params,
http_client=litellm.aclient_session,
)
else:
async_azure_client = client
## LOGGING
logging_obj.pre_call(
input=f"audio_file_{uuid.uuid4()}",
api_key=async_azure_client.api_key,
additional_args={
"headers": {
"Authorization": f"Bearer {async_azure_client.api_key}"
},
"api_base": async_azure_client._base_url._uri_reference,
"atranscription": True,
"complete_input_dict": data,
},
)
raw_response = (
await async_azure_client.audio.transcriptions.with_raw_response.create(
**data, timeout=timeout
)
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=audio_file.name,
api_key=api_key,
additional_args={
"headers": {
"Authorization": f"Bearer {async_azure_client.api_key}"
},
"api_base": async_azure_client._base_url._uri_reference,
"atranscription": True,
"complete_input_dict": data,
},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
response = convert_to_model_response_object(
_response_headers=headers,
response_object=stringified_response,
model_response_object=model_response,
hidden_params=hidden_params,
response_type="audio_transcription",
) # type: ignore
return response
except Exception as e:
## LOGGING
logging_obj.post_call(
input=input,
api_key=api_key,
original_response=str(e),
)
raise e
def audio_speech(
self,
model: str,
@ -2473,7 +2292,7 @@ class AzureAssistantsAPI(BaseLLM):
"""
Here's an example:
```
from litellm.llms.openai import OpenAIAssistantsAPI, MessageData
from litellm.llms.OpenAI.openai import OpenAIAssistantsAPI, MessageData
# create thread
message: MessageData = {"role": "user", "content": "Hey, how's it going?"}

View file

@ -0,0 +1,177 @@
from typing import Optional, Union
import httpx
from openai import AsyncOpenAI, OpenAI
from pydantic import BaseModel
import litellm
from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.types.utils import FileTypes
from litellm.utils import TranscriptionResponse, convert_to_model_response_object
from .openai import OpenAIChatCompletion
class OpenAIAudioTranscription(OpenAIChatCompletion):
# Audio Transcriptions
async def make_openai_audio_transcriptions_request(
self,
openai_aclient: AsyncOpenAI,
data: dict,
timeout: Union[float, httpx.Timeout],
):
"""
Helper to:
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
- call openai_aclient.audio.transcriptions.create by default
"""
try:
if litellm.return_response_headers is True:
raw_response = (
await openai_aclient.audio.transcriptions.with_raw_response.create(
**data, timeout=timeout
)
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
return headers, response
else:
response = await openai_aclient.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
return None, response
except Exception as e:
raise e
def make_sync_openai_audio_transcriptions_request(
self,
openai_client: OpenAI,
data: dict,
timeout: Union[float, httpx.Timeout],
):
"""
Helper to:
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
- call openai_aclient.audio.transcriptions.create by default
"""
try:
if litellm.return_response_headers is True:
raw_response = (
openai_client.audio.transcriptions.with_raw_response.create(
**data, timeout=timeout
)
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
return headers, response
else:
response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
return None, response
except Exception as e:
raise e
def audio_transcriptions(
self,
model: str,
audio_file: FileTypes,
optional_params: dict,
model_response: TranscriptionResponse,
timeout: float,
max_retries: int,
api_key: Optional[str],
api_base: Optional[str],
client=None,
logging_obj=None,
atranscription: bool = False,
):
data = {"model": model, "file": audio_file, **optional_params}
if atranscription is True:
return self.async_audio_transcriptions(
audio_file=audio_file,
data=data,
model_response=model_response,
timeout=timeout,
api_key=api_key,
api_base=api_base,
client=client,
max_retries=max_retries,
logging_obj=logging_obj,
)
openai_client = self._get_openai_client(
is_async=False,
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
)
_, response = self.make_sync_openai_audio_transcriptions_request(
openai_client=openai_client,
data=data,
timeout=timeout,
)
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=get_audio_file_name(audio_file),
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
return final_response
async def async_audio_transcriptions(
self,
audio_file: FileTypes,
data: dict,
model_response: TranscriptionResponse,
timeout: float,
logging_obj: LiteLLMLoggingObj,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
client=None,
max_retries=None,
):
try:
openai_aclient = self._get_openai_client(
is_async=True,
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
client=client,
)
headers, response = await self.make_openai_audio_transcriptions_request(
openai_aclient=openai_aclient,
data=data,
timeout=timeout,
)
logging_obj.model_call_details["response_headers"] = headers
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=get_audio_file_name(audio_file),
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
except Exception as e:
## LOGGING
logging_obj.post_call(
input=input,
api_key=api_key,
original_response=str(e),
)
raise e

View file

@ -4,16 +4,7 @@ import os
import time
import traceback
import types
from typing import (
Any,
BinaryIO,
Callable,
Coroutine,
Iterable,
Literal,
Optional,
Union,
)
from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union
import httpx
import openai
@ -33,14 +24,13 @@ from litellm.utils import (
Message,
ModelResponse,
TextCompletionResponse,
TranscriptionResponse,
Usage,
convert_to_model_response_object,
)
from ..types.llms.openai import *
from .base import BaseLLM
from .prompt_templates.factory import custom_prompt, prompt_factory
from ...types.llms.openai import *
from ..base import BaseLLM
from ..prompt_templates.factory import custom_prompt, prompt_factory
class OpenAIError(Exception):
@ -1608,168 +1598,6 @@ class OpenAIChatCompletion(BaseLLM):
else:
raise OpenAIError(status_code=500, message=str(e))
# Audio Transcriptions
async def make_openai_audio_transcriptions_request(
self,
openai_aclient: AsyncOpenAI,
data: dict,
timeout: Union[float, httpx.Timeout],
):
"""
Helper to:
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
- call openai_aclient.audio.transcriptions.create by default
"""
try:
if litellm.return_response_headers is True:
raw_response = (
await openai_aclient.audio.transcriptions.with_raw_response.create(
**data, timeout=timeout
)
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
return headers, response
else:
response = await openai_aclient.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
return None, response
except Exception as e:
raise e
def make_sync_openai_audio_transcriptions_request(
self,
openai_client: OpenAI,
data: dict,
timeout: Union[float, httpx.Timeout],
):
"""
Helper to:
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
- call openai_aclient.audio.transcriptions.create by default
"""
try:
if litellm.return_response_headers is True:
raw_response = (
openai_client.audio.transcriptions.with_raw_response.create(
**data, timeout=timeout
)
) # type: ignore
headers = dict(raw_response.headers)
response = raw_response.parse()
return headers, response
else:
response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
return None, response
except Exception as e:
raise e
def audio_transcriptions(
self,
model: str,
audio_file: BinaryIO,
optional_params: dict,
model_response: TranscriptionResponse,
timeout: float,
max_retries: int,
api_key: Optional[str],
api_base: Optional[str],
client=None,
logging_obj=None,
atranscription: bool = False,
):
data = {"model": model, "file": audio_file, **optional_params}
if atranscription is True:
return self.async_audio_transcriptions(
audio_file=audio_file,
data=data,
model_response=model_response,
timeout=timeout,
api_key=api_key,
api_base=api_base,
client=client,
max_retries=max_retries,
logging_obj=logging_obj,
)
openai_client = self._get_openai_client(
is_async=False,
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
)
_, response = self.make_sync_openai_audio_transcriptions_request(
openai_client=openai_client,
data=data,
timeout=timeout,
)
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=audio_file.name,
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
return final_response
async def async_audio_transcriptions(
self,
audio_file: BinaryIO,
data: dict,
model_response: TranscriptionResponse,
timeout: float,
logging_obj: LiteLLMLoggingObj,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
client=None,
max_retries=None,
):
try:
openai_aclient = self._get_openai_client(
is_async=True,
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
client=client,
)
headers, response = await self.make_openai_audio_transcriptions_request(
openai_aclient=openai_aclient,
data=data,
timeout=timeout,
)
logging_obj.model_call_details["response_headers"] = headers
if isinstance(response, BaseModel):
stringified_response = response.model_dump()
else:
stringified_response = TranscriptionResponse(text=response).model_dump()
## LOGGING
logging_obj.post_call(
input=audio_file.name,
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
except Exception as e:
## LOGGING
logging_obj.post_call(
input=input,
api_key=api_key,
original_response=str(e),
)
raise e
def audio_speech(
self,
model: str,
@ -3228,7 +3056,7 @@ class OpenAIAssistantsAPI(BaseLLM):
"""
Here's an example:
```
from litellm.llms.openai import OpenAIAssistantsAPI, MessageData
from litellm.llms.OpenAI.openai import OpenAIAssistantsAPI, MessageData
# create thread
message: MessageData = {"role": "user", "content": "Hey, how's it going?"}

View file

@ -1,7 +1,7 @@
import json
import types # type: ignore
import uuid
from typing import Any, BinaryIO, Callable, Optional, Union
from typing import Any, Callable, Optional, Union
import httpx
import requests
@ -19,8 +19,8 @@ from litellm.utils import (
convert_to_model_response_object,
)
from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig
from .base import BaseLLM
from .OpenAI.openai import OpenAITextCompletion, OpenAITextCompletionConfig
from .prompt_templates.factory import custom_prompt, prompt_factory
openai_text_completion_config = OpenAITextCompletionConfig()

View file

@ -7,7 +7,7 @@ import litellm
from litellm.caching import Cache
from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.llms.openai import AllMessageValues
from litellm.llms.OpenAI.openai import AllMessageValues
from litellm.types.llms.vertex_ai import (
CachedContentListAllResponseBody,
RequestBody,

View file

@ -12,7 +12,7 @@ from litellm.llms.custom_httpx.http_handler import (
_get_async_httpx_client,
_get_httpx_client,
)
from litellm.llms.openai import HttpxBinaryResponseContent
from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
VertexLLM,
)

View file

@ -81,7 +81,7 @@ class VertexAIPartnerModels(BaseLLM):
from google.cloud import aiplatform
from litellm.llms.databricks import DatabricksChatCompletion
from litellm.llms.openai import OpenAIChatCompletion
from litellm.llms.OpenAI.openai import OpenAIChatCompletion
from litellm.llms.text_completion_codestral import CodestralTextCompletion
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
VertexLLM,

View file

@ -22,18 +22,7 @@ import uuid
from concurrent.futures import ThreadPoolExecutor
from copy import deepcopy
from functools import partial
from typing import (
Any,
BinaryIO,
Callable,
Dict,
List,
Literal,
Mapping,
Optional,
Type,
Union,
)
from typing import Any, Callable, Dict, List, Literal, Mapping, Optional, Type, Union
import dotenv
import httpx
@ -93,8 +82,9 @@ from .llms import (
from .llms.AI21 import completion as ai21
from .llms.anthropic.chat import AnthropicChatCompletion
from .llms.anthropic.completion import AnthropicTextCompletion
from .llms.azure import AzureChatCompletion, _check_dynamic_azure_params
from .llms.azure_text import AzureTextCompletion
from .llms.AzureOpenAI.audio_transcriptions import AzureAudioTranscription
from .llms.AzureOpenAI.azure import AzureChatCompletion, _check_dynamic_azure_params
from .llms.bedrock import image_generation as bedrock_image_generation # type: ignore
from .llms.bedrock.chat import BedrockConverseLLM, BedrockLLM
from .llms.bedrock.embed.embedding import BedrockEmbedding
@ -104,7 +94,8 @@ from .llms.cohere import embed as cohere_embed
from .llms.custom_llm import CustomLLM, custom_chat_llm_router
from .llms.databricks import DatabricksChatCompletion
from .llms.huggingface_restapi import Huggingface
from .llms.openai import OpenAIChatCompletion, OpenAITextCompletion
from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
from .llms.OpenAI.openai import OpenAIChatCompletion, OpenAITextCompletion
from .llms.predibase import PredibaseChatCompletion
from .llms.prompt_templates.factory import (
custom_prompt,
@ -146,6 +137,7 @@ from .types.llms.openai import HttpxBinaryResponseContent
from .types.utils import (
AdapterCompletionStreamWrapper,
ChatCompletionMessageToolCall,
FileTypes,
HiddenParams,
all_litellm_params,
)
@ -169,11 +161,13 @@ from litellm.utils import (
####### ENVIRONMENT VARIABLES ###################
openai_chat_completions = OpenAIChatCompletion()
openai_text_completions = OpenAITextCompletion()
openai_audio_transcriptions = OpenAIAudioTranscription()
databricks_chat_completions = DatabricksChatCompletion()
anthropic_chat_completions = AnthropicChatCompletion()
anthropic_text_completions = AnthropicTextCompletion()
azure_chat_completions = AzureChatCompletion()
azure_text_completions = AzureTextCompletion()
azure_audio_transcriptions = AzureAudioTranscription()
huggingface = Huggingface()
predibase_chat_completions = PredibaseChatCompletion()
codestral_text_completions = CodestralTextCompletion()
@ -4614,7 +4608,7 @@ async def atranscription(*args, **kwargs) -> TranscriptionResponse:
@client
def transcription(
model: str,
file: BinaryIO,
file: FileTypes,
## OPTIONAL OPENAI PARAMS ##
language: Optional[str] = None,
prompt: Optional[str] = None,
@ -4704,7 +4698,7 @@ def transcription(
or get_secret("AZURE_API_KEY")
) # type: ignore
response = azure_chat_completions.audio_transcriptions(
response = azure_audio_transcriptions.audio_transcriptions(
model=model,
audio_file=file,
optional_params=optional_params,
@ -4738,7 +4732,7 @@ def transcription(
or litellm.openai_key
or get_secret("OPENAI_API_KEY")
) # type: ignore
response = openai_chat_completions.audio_transcriptions(
response = openai_audio_transcriptions.audio_transcriptions(
model=model,
audio_file=file,
optional_params=optional_params,

View file

@ -24,18 +24,7 @@ import traceback
import uuid
from collections import defaultdict
from datetime import datetime
from typing import (
Any,
BinaryIO,
Dict,
Iterable,
List,
Literal,
Optional,
Tuple,
TypedDict,
Union,
)
from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, TypedDict, Union
import httpx
import openai
@ -48,7 +37,7 @@ from litellm.assistants.main import AssistantDeleted
from litellm.caching import DualCache, InMemoryCache, RedisCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
from litellm.llms.azure import get_azure_ad_token_from_oidc
from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
@ -1342,7 +1331,7 @@ class Router:
self.fail_calls[model_name] += 1
raise e
async def atranscription(self, file: BinaryIO, model: str, **kwargs):
async def atranscription(self, file: FileTypes, model: str, **kwargs):
"""
Example Usage:
@ -1386,7 +1375,7 @@ class Router:
)
raise e
async def _atranscription(self, file: BinaryIO, model: str, **kwargs):
async def _atranscription(self, file: FileTypes, model: str, **kwargs):
try:
verbose_router_logger.debug(
f"Inside _atranscription()- model: {model}; kwargs: {kwargs}"

View file

@ -8,7 +8,7 @@ import openai
import litellm
from litellm._logging import verbose_router_logger
from litellm.llms.azure import get_azure_ad_token_from_oidc
from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc
from litellm.secret_managers.get_azure_ad_token_provider import (
get_azure_ad_token_provider,
)
@ -337,7 +337,9 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict):
azure_client_params["azure_ad_token_provider"] = (
azure_ad_token_provider
)
from litellm.llms.azure import select_azure_base_url_or_endpoint
from litellm.llms.AzureOpenAI.azure import (
select_azure_base_url_or_endpoint,
)
# this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
# required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client

View file

@ -20,15 +20,15 @@ from typing_extensions import override
import litellm
from litellm import create_thread, get_thread
from litellm.llms.openai import (
from litellm.llms.OpenAI.openai import (
AssistantEventHandler,
AsyncAssistantEventHandler,
AsyncCursorPage,
MessageData,
OpenAIAssistantsAPI,
)
from litellm.llms.openai import OpenAIMessage as Message
from litellm.llms.openai import SyncCursorPage, Thread
from litellm.llms.OpenAI.openai import OpenAIMessage as Message
from litellm.llms.OpenAI.openai import SyncCursorPage, Thread
"""
V0 Scope:

View file

@ -60,7 +60,7 @@ async def test_audio_speech_litellm(sync_mode, model, api_base, api_key):
optional_params={},
)
from litellm.llms.openai import HttpxBinaryResponseContent
from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent
assert isinstance(response, HttpxBinaryResponseContent)
else:
@ -78,7 +78,7 @@ async def test_audio_speech_litellm(sync_mode, model, api_base, api_key):
optional_params={},
)
from litellm.llms.openai import HttpxBinaryResponseContent
from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent
assert isinstance(response, HttpxBinaryResponseContent)
@ -115,7 +115,7 @@ async def test_audio_speech_router(mode):
optional_params={},
)
from litellm.llms.openai import HttpxBinaryResponseContent
from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent
assert isinstance(response, HttpxBinaryResponseContent)
@ -146,7 +146,7 @@ async def test_audio_speech_litellm_vertex(sync_mode):
from types import SimpleNamespace
from litellm.llms.openai import HttpxBinaryResponseContent
from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent
response.stream_to_file(speech_file_path)

View file

@ -16,7 +16,7 @@ sys.path.insert(
) # Adds the parent directory to the system path
import pytest
from litellm.llms.azure import get_azure_ad_token_from_oidc
from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc
from litellm.llms.bedrock.chat import BedrockConverseLLM, BedrockLLM
from litellm.secret_managers.aws_secret_manager import load_aws_secret_manager
from litellm.secret_managers.main import get_secret

View file

@ -20,7 +20,7 @@ from litellm.utils import ModelResponseListIterator
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
) # Adds the parent directory to the system-path
from dotenv import load_dotenv
load_dotenv()

View file

@ -1,16 +1,5 @@
from os import PathLike
from typing import (
IO,
Any,
BinaryIO,
Iterable,
List,
Literal,
Mapping,
Optional,
Tuple,
Union,
)
from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union
from openai._legacy_response import HttpxBinaryResponseContent
from openai.lib.streaming._assistants import (

View file

@ -5,6 +5,7 @@ from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
from openai._models import BaseModel as OpenAIObject
from openai.types.audio.transcription_create_params import FileTypes
from openai.types.completion_usage import CompletionUsage
from pydantic import ConfigDict, Field, PrivateAttr
from typing_extensions import Callable, Dict, Required, TypedDict, override

View file

@ -55,12 +55,10 @@ from tokenizers import Tokenizer
import litellm
import litellm._service_logger # for storing API inputs, outputs, and metadata
import litellm.litellm_core_utils
import litellm.litellm_core_utils.audio_utils.utils
import litellm.litellm_core_utils.json_validation_rule
from litellm.caching import DualCache
from litellm.litellm_core_utils.core_helpers import (
get_file_check_sum,
map_finish_reason,
)
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.litellm_core_utils.exception_mapping_utils import get_error_message
from litellm.litellm_core_utils.get_llm_provider_logic import (
_is_non_openai_azure_model,
@ -86,6 +84,7 @@ from litellm.types.utils import (
Delta,
Embedding,
EmbeddingResponse,
FileTypes,
ImageResponse,
Message,
ModelInfo,
@ -161,7 +160,6 @@ except Exception as e:
from concurrent.futures import ThreadPoolExecutor
from typing import (
Any,
BinaryIO,
Callable,
Dict,
Iterable,
@ -566,14 +564,17 @@ def function_setup(
call_type == CallTypes.atranscription.value
or call_type == CallTypes.transcription.value
):
_file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
file_checksum = get_file_check_sum(_file=_file_name)
file_name = _file_name.name
_file_obj: FileTypes = args[1] if len(args) > 1 else kwargs["file"]
file_checksum = (
litellm.litellm_core_utils.audio_utils.utils.get_audio_file_name(
file_obj=_file_obj
)
)
if "metadata" in kwargs:
kwargs["metadata"]["file_checksum"] = file_checksum
else:
kwargs["metadata"] = {"file_checksum": file_checksum}
messages = file_name
messages = file_checksum
elif (
call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value
):