mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
* refactor: introduce new transformation config for gpt-4o-transcribe models * refactor: expose new transformation configs for audio transcription * ci: fix config yml * feat(openai/transcriptions): support provider config transformation on openai audio transcriptions allows gpt-4o and whisper audio transformation to work as expected * refactor: migrate fireworks ai + deepgram to new transform request pattern * feat(openai/): working support for gpt-4o-audio-transcribe * build(model_prices_and_context_window.json): add gpt-4o-transcribe to model cost map * build(model_prices_and_context_window.json): specify what endpoints are supported for `/audio/transcriptions` * fix(get_supported_openai_params.py): fix return * refactor(deepgram/): migrate unit test to deepgram handler * refactor: cleanup unused imports * fix(get_supported_openai_params.py): fix linting error * test: update test
222 lines
7.9 KiB
Python
222 lines
7.9 KiB
Python
from typing import Optional, Union
|
|
|
|
import httpx
|
|
from openai import AsyncOpenAI, OpenAI
|
|
from pydantic import BaseModel
|
|
|
|
import litellm
|
|
from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
|
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
|
from litellm.llms.base_llm.audio_transcription.transformation import (
|
|
BaseAudioTranscriptionConfig,
|
|
)
|
|
from litellm.types.utils import FileTypes
|
|
from litellm.utils import (
|
|
TranscriptionResponse,
|
|
convert_to_model_response_object,
|
|
extract_duration_from_srt_or_vtt,
|
|
)
|
|
|
|
from ..openai import OpenAIChatCompletion
|
|
|
|
|
|
class OpenAIAudioTranscription(OpenAIChatCompletion):
|
|
# Audio Transcriptions
|
|
async def make_openai_audio_transcriptions_request(
|
|
self,
|
|
openai_aclient: AsyncOpenAI,
|
|
data: dict,
|
|
timeout: Union[float, httpx.Timeout],
|
|
):
|
|
"""
|
|
Helper to:
|
|
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
|
|
- call openai_aclient.audio.transcriptions.create by default
|
|
"""
|
|
try:
|
|
raw_response = (
|
|
await openai_aclient.audio.transcriptions.with_raw_response.create(
|
|
**data, timeout=timeout
|
|
)
|
|
) # type: ignore
|
|
headers = dict(raw_response.headers)
|
|
response = raw_response.parse()
|
|
|
|
return headers, response
|
|
except Exception as e:
|
|
raise e
|
|
|
|
def make_sync_openai_audio_transcriptions_request(
|
|
self,
|
|
openai_client: OpenAI,
|
|
data: dict,
|
|
timeout: Union[float, httpx.Timeout],
|
|
):
|
|
"""
|
|
Helper to:
|
|
- call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
|
|
- call openai_aclient.audio.transcriptions.create by default
|
|
"""
|
|
try:
|
|
if litellm.return_response_headers is True:
|
|
raw_response = (
|
|
openai_client.audio.transcriptions.with_raw_response.create(
|
|
**data, timeout=timeout
|
|
)
|
|
) # type: ignore
|
|
headers = dict(raw_response.headers)
|
|
response = raw_response.parse()
|
|
return headers, response
|
|
else:
|
|
response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
|
|
return None, response
|
|
except Exception as e:
|
|
raise e
|
|
|
|
def audio_transcriptions(
|
|
self,
|
|
model: str,
|
|
audio_file: FileTypes,
|
|
optional_params: dict,
|
|
litellm_params: dict,
|
|
model_response: TranscriptionResponse,
|
|
timeout: float,
|
|
max_retries: int,
|
|
logging_obj: LiteLLMLoggingObj,
|
|
api_key: Optional[str],
|
|
api_base: Optional[str],
|
|
client=None,
|
|
atranscription: bool = False,
|
|
provider_config: Optional[BaseAudioTranscriptionConfig] = None,
|
|
) -> TranscriptionResponse:
|
|
"""
|
|
Handle audio transcription request
|
|
"""
|
|
if provider_config is not None:
|
|
data = provider_config.transform_audio_transcription_request(
|
|
model=model,
|
|
audio_file=audio_file,
|
|
optional_params=optional_params,
|
|
litellm_params=litellm_params,
|
|
)
|
|
|
|
if isinstance(data, bytes):
|
|
raise ValueError("OpenAI transformation route requires a dict")
|
|
else:
|
|
data = {"model": model, "file": audio_file, **optional_params}
|
|
|
|
if atranscription is True:
|
|
return self.async_audio_transcriptions( # type: ignore
|
|
audio_file=audio_file,
|
|
data=data,
|
|
model_response=model_response,
|
|
timeout=timeout,
|
|
api_key=api_key,
|
|
api_base=api_base,
|
|
client=client,
|
|
max_retries=max_retries,
|
|
logging_obj=logging_obj,
|
|
)
|
|
|
|
openai_client: OpenAI = self._get_openai_client( # type: ignore
|
|
is_async=False,
|
|
api_key=api_key,
|
|
api_base=api_base,
|
|
timeout=timeout,
|
|
max_retries=max_retries,
|
|
client=client,
|
|
)
|
|
|
|
## LOGGING
|
|
logging_obj.pre_call(
|
|
input=None,
|
|
api_key=openai_client.api_key,
|
|
additional_args={
|
|
"api_base": openai_client._base_url._uri_reference,
|
|
"atranscription": True,
|
|
"complete_input_dict": data,
|
|
},
|
|
)
|
|
_, response = self.make_sync_openai_audio_transcriptions_request(
|
|
openai_client=openai_client,
|
|
data=data,
|
|
timeout=timeout,
|
|
)
|
|
|
|
if isinstance(response, BaseModel):
|
|
stringified_response = response.model_dump()
|
|
else:
|
|
stringified_response = TranscriptionResponse(text=response).model_dump()
|
|
|
|
## LOGGING
|
|
logging_obj.post_call(
|
|
input=get_audio_file_name(audio_file),
|
|
api_key=api_key,
|
|
additional_args={"complete_input_dict": data},
|
|
original_response=stringified_response,
|
|
)
|
|
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
|
|
final_response: TranscriptionResponse = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
|
|
return final_response
|
|
|
|
async def async_audio_transcriptions(
|
|
self,
|
|
audio_file: FileTypes,
|
|
data: dict,
|
|
model_response: TranscriptionResponse,
|
|
timeout: float,
|
|
logging_obj: LiteLLMLoggingObj,
|
|
api_key: Optional[str] = None,
|
|
api_base: Optional[str] = None,
|
|
client=None,
|
|
max_retries=None,
|
|
):
|
|
try:
|
|
openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore
|
|
is_async=True,
|
|
api_key=api_key,
|
|
api_base=api_base,
|
|
timeout=timeout,
|
|
max_retries=max_retries,
|
|
client=client,
|
|
)
|
|
|
|
## LOGGING
|
|
logging_obj.pre_call(
|
|
input=None,
|
|
api_key=openai_aclient.api_key,
|
|
additional_args={
|
|
"api_base": openai_aclient._base_url._uri_reference,
|
|
"atranscription": True,
|
|
"complete_input_dict": data,
|
|
},
|
|
)
|
|
headers, response = await self.make_openai_audio_transcriptions_request(
|
|
openai_aclient=openai_aclient,
|
|
data=data,
|
|
timeout=timeout,
|
|
)
|
|
logging_obj.model_call_details["response_headers"] = headers
|
|
if isinstance(response, BaseModel):
|
|
stringified_response = response.model_dump()
|
|
else:
|
|
duration = extract_duration_from_srt_or_vtt(response)
|
|
stringified_response = TranscriptionResponse(text=response).model_dump()
|
|
stringified_response["duration"] = duration
|
|
## LOGGING
|
|
logging_obj.post_call(
|
|
input=get_audio_file_name(audio_file),
|
|
api_key=api_key,
|
|
additional_args={"complete_input_dict": data},
|
|
original_response=stringified_response,
|
|
)
|
|
hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
|
|
return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
|
|
except Exception as e:
|
|
## LOGGING
|
|
logging_obj.post_call(
|
|
input=input,
|
|
api_key=api_key,
|
|
original_response=str(e),
|
|
)
|
|
raise e
|