mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Add OpenAI gpt-4o-transcribe support (#9517)
* refactor: introduce new transformation config for gpt-4o-transcribe models * refactor: expose new transformation configs for audio transcription * ci: fix config yml * feat(openai/transcriptions): support provider config transformation on openai audio transcriptions allows gpt-4o and whisper audio transformation to work as expected * refactor: migrate fireworks ai + deepgram to new transform request pattern * feat(openai/): working support for gpt-4o-audio-transcribe * build(model_prices_and_context_window.json): add gpt-4o-transcribe to model cost map * build(model_prices_and_context_window.json): specify what endpoints are supported for `/audio/transcriptions` * fix(get_supported_openai_params.py): fix return * refactor(deepgram/): migrate unit test to deepgram handler * refactor: cleanup unused imports * fix(get_supported_openai_params.py): fix linting error * test: update test
This commit is contained in:
parent
109add7946
commit
c0845fec1f
20 changed files with 402 additions and 92 deletions
|
@ -1,4 +1,3 @@
|
|||
import io
|
||||
import json
|
||||
from typing import TYPE_CHECKING, Any, Coroutine, Dict, Optional, Tuple, Union
|
||||
|
||||
|
@ -8,6 +7,9 @@ import litellm
|
|||
import litellm.litellm_core_utils
|
||||
import litellm.types
|
||||
import litellm.types.utils
|
||||
from litellm.llms.base_llm.audio_transcription.transformation import (
|
||||
BaseAudioTranscriptionConfig,
|
||||
)
|
||||
from litellm.llms.base_llm.chat.transformation import BaseConfig
|
||||
from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
|
||||
from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
|
||||
|
@ -852,54 +854,12 @@ class BaseLLMHTTPHandler:
|
|||
request_data=request_data,
|
||||
)
|
||||
|
||||
def handle_audio_file(self, audio_file: FileTypes) -> bytes:
|
||||
"""
|
||||
Processes the audio file input based on its type and returns the binary data.
|
||||
|
||||
Args:
|
||||
audio_file: Can be a file path (str), a tuple (filename, file_content), or binary data (bytes).
|
||||
|
||||
Returns:
|
||||
The binary data of the audio file.
|
||||
"""
|
||||
binary_data: bytes # Explicitly declare the type
|
||||
|
||||
# Handle the audio file based on type
|
||||
if isinstance(audio_file, str):
|
||||
# If it's a file path
|
||||
with open(audio_file, "rb") as f:
|
||||
binary_data = f.read() # `f.read()` always returns `bytes`
|
||||
elif isinstance(audio_file, tuple):
|
||||
# Handle tuple case
|
||||
_, file_content = audio_file[:2]
|
||||
if isinstance(file_content, str):
|
||||
with open(file_content, "rb") as f:
|
||||
binary_data = f.read() # `f.read()` always returns `bytes`
|
||||
elif isinstance(file_content, bytes):
|
||||
binary_data = file_content
|
||||
else:
|
||||
raise TypeError(
|
||||
f"Unexpected type in tuple: {type(file_content)}. Expected str or bytes."
|
||||
)
|
||||
elif isinstance(audio_file, bytes):
|
||||
# Assume it's already binary data
|
||||
binary_data = audio_file
|
||||
elif isinstance(audio_file, io.BufferedReader) or isinstance(
|
||||
audio_file, io.BytesIO
|
||||
):
|
||||
# Handle file-like objects
|
||||
binary_data = audio_file.read()
|
||||
|
||||
else:
|
||||
raise TypeError(f"Unsupported type for audio_file: {type(audio_file)}")
|
||||
|
||||
return binary_data
|
||||
|
||||
def audio_transcriptions(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: FileTypes,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
max_retries: int,
|
||||
|
@ -910,11 +870,8 @@ class BaseLLMHTTPHandler:
|
|||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
atranscription: bool = False,
|
||||
headers: dict = {},
|
||||
litellm_params: dict = {},
|
||||
provider_config: Optional[BaseAudioTranscriptionConfig] = None,
|
||||
) -> TranscriptionResponse:
|
||||
provider_config = ProviderConfigManager.get_provider_audio_transcription_config(
|
||||
model=model, provider=litellm.LlmProviders(custom_llm_provider)
|
||||
)
|
||||
if provider_config is None:
|
||||
raise ValueError(
|
||||
f"No provider config found for model: {model} and provider: {custom_llm_provider}"
|
||||
|
@ -938,7 +895,18 @@ class BaseLLMHTTPHandler:
|
|||
)
|
||||
|
||||
# Handle the audio file based on type
|
||||
binary_data = self.handle_audio_file(audio_file)
|
||||
data = provider_config.transform_audio_transcription_request(
|
||||
model=model,
|
||||
audio_file=audio_file,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
binary_data: Optional[bytes] = None
|
||||
json_data: Optional[dict] = None
|
||||
if isinstance(data, bytes):
|
||||
binary_data = data
|
||||
else:
|
||||
json_data = data
|
||||
|
||||
try:
|
||||
# Make the POST request
|
||||
|
@ -946,6 +914,7 @@ class BaseLLMHTTPHandler:
|
|||
url=complete_url,
|
||||
headers=headers,
|
||||
content=binary_data,
|
||||
json=json_data,
|
||||
timeout=timeout,
|
||||
)
|
||||
except Exception as e:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue