mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
Add OpenAI gpt-4o-transcribe support (#9517)
* refactor: introduce new transformation config for gpt-4o-transcribe models * refactor: expose new transformation configs for audio transcription * ci: fix config yml * feat(openai/transcriptions): support provider config transformation on openai audio transcriptions allows gpt-4o and whisper audio transformation to work as expected * refactor: migrate fireworks ai + deepgram to new transform request pattern * feat(openai/): working support for gpt-4o-audio-transcribe * build(model_prices_and_context_window.json): add gpt-4o-transcribe to model cost map * build(model_prices_and_context_window.json): specify what endpoints are supported for `/audio/transcriptions` * fix(get_supported_openai_params.py): fix return * refactor(deepgram/): migrate unit test to deepgram handler * refactor: cleanup unused imports * fix(get_supported_openai_params.py): fix linting error * test: update test
This commit is contained in:
parent
f2df53771c
commit
d58fe5a9f9
20 changed files with 402 additions and 92 deletions
|
@ -7,6 +7,9 @@ from pydantic import BaseModel
|
|||
import litellm
|
||||
from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.base_llm.audio_transcription.transformation import (
|
||||
BaseAudioTranscriptionConfig,
|
||||
)
|
||||
from litellm.types.utils import FileTypes
|
||||
from litellm.utils import (
|
||||
TranscriptionResponse,
|
||||
|
@ -75,6 +78,7 @@ class OpenAIAudioTranscription(OpenAIChatCompletion):
|
|||
model: str,
|
||||
audio_file: FileTypes,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
max_retries: int,
|
||||
|
@ -83,16 +87,24 @@ class OpenAIAudioTranscription(OpenAIChatCompletion):
|
|||
api_base: Optional[str],
|
||||
client=None,
|
||||
atranscription: bool = False,
|
||||
provider_config: Optional[BaseAudioTranscriptionConfig] = None,
|
||||
) -> TranscriptionResponse:
|
||||
data = {"model": model, "file": audio_file, **optional_params}
|
||||
|
||||
if "response_format" not in data or (
|
||||
data["response_format"] == "text" or data["response_format"] == "json"
|
||||
):
|
||||
data["response_format"] = (
|
||||
"verbose_json" # ensures 'duration' is received - used for cost calculation
|
||||
"""
|
||||
Handle audio transcription request
|
||||
"""
|
||||
if provider_config is not None:
|
||||
data = provider_config.transform_audio_transcription_request(
|
||||
model=model,
|
||||
audio_file=audio_file,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
if isinstance(data, bytes):
|
||||
raise ValueError("OpenAI transformation route requires a dict")
|
||||
else:
|
||||
data = {"model": model, "file": audio_file, **optional_params}
|
||||
|
||||
if atranscription is True:
|
||||
return self.async_audio_transcriptions( # type: ignore
|
||||
audio_file=audio_file,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue