mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Litellm dev 12 28 2024 p3 (#7464)
* feat(deepgram/): initial e2e support for deepgram stt Uses deepgram's `/listen` endpoint to transcribe speech to text Closes https://github.com/BerriAI/litellm/issues/4875 * fix: fix linting errors * test: fix test
This commit is contained in:
parent
480d838790
commit
ebc28b1921
10 changed files with 303 additions and 5 deletions
|
@ -1,3 +1,4 @@
|
|||
import io
|
||||
import json
|
||||
from typing import TYPE_CHECKING, Any, Optional, Tuple, Union
|
||||
|
||||
|
@ -17,7 +18,7 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
get_async_httpx_client,
|
||||
)
|
||||
from litellm.types.rerank import OptionalRerankParams, RerankResponse
|
||||
from litellm.types.utils import EmbeddingResponse
|
||||
from litellm.types.utils import EmbeddingResponse, FileTypes, TranscriptionResponse
|
||||
from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -667,6 +668,115 @@ class BaseLLMHTTPHandler:
|
|||
request_data=request_data,
|
||||
)
|
||||
|
||||
def handle_audio_file(self, audio_file: FileTypes) -> bytes:
|
||||
"""
|
||||
Processes the audio file input based on its type and returns the binary data.
|
||||
|
||||
Args:
|
||||
audio_file: Can be a file path (str), a tuple (filename, file_content), or binary data (bytes).
|
||||
|
||||
Returns:
|
||||
The binary data of the audio file.
|
||||
"""
|
||||
binary_data: bytes # Explicitly declare the type
|
||||
|
||||
# Handle the audio file based on type
|
||||
if isinstance(audio_file, str):
|
||||
# If it's a file path
|
||||
with open(audio_file, "rb") as f:
|
||||
binary_data = f.read() # `f.read()` always returns `bytes`
|
||||
elif isinstance(audio_file, tuple):
|
||||
# Handle tuple case
|
||||
_, file_content = audio_file[:2]
|
||||
if isinstance(file_content, str):
|
||||
with open(file_content, "rb") as f:
|
||||
binary_data = f.read() # `f.read()` always returns `bytes`
|
||||
elif isinstance(file_content, bytes):
|
||||
binary_data = file_content
|
||||
else:
|
||||
raise TypeError(
|
||||
f"Unexpected type in tuple: {type(file_content)}. Expected str or bytes."
|
||||
)
|
||||
elif isinstance(audio_file, bytes):
|
||||
# Assume it's already binary data
|
||||
binary_data = audio_file
|
||||
elif isinstance(audio_file, io.BufferedReader):
|
||||
# Handle file-like objects
|
||||
binary_data = audio_file.read()
|
||||
|
||||
else:
|
||||
raise TypeError(f"Unsupported type for audio_file: {type(audio_file)}")
|
||||
|
||||
return binary_data
|
||||
|
||||
def audio_transcriptions(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: FileTypes,
|
||||
optional_params: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
max_retries: int,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
api_key: Optional[str],
|
||||
api_base: Optional[str],
|
||||
custom_llm_provider: str,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
atranscription: bool = False,
|
||||
headers: dict = {},
|
||||
) -> TranscriptionResponse:
|
||||
provider_config = ProviderConfigManager.get_provider_audio_transcription_config(
|
||||
model=model, provider=litellm.LlmProviders(custom_llm_provider)
|
||||
)
|
||||
if provider_config is None:
|
||||
raise ValueError(
|
||||
f"No provider config found for model: {model} and provider: {custom_llm_provider}"
|
||||
)
|
||||
headers = provider_config.validate_environment(
|
||||
api_key=api_key,
|
||||
headers=headers,
|
||||
model=model,
|
||||
messages=[],
|
||||
optional_params=optional_params,
|
||||
)
|
||||
|
||||
if client is None or not isinstance(client, HTTPHandler):
|
||||
client = _get_httpx_client()
|
||||
|
||||
complete_url = provider_config.get_complete_url(
|
||||
api_base=api_base,
|
||||
model=model,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
|
||||
# Handle the audio file based on type
|
||||
binary_data = self.handle_audio_file(audio_file)
|
||||
|
||||
try:
|
||||
# Make the POST request
|
||||
response = client.post(
|
||||
url=complete_url,
|
||||
headers=headers,
|
||||
content=binary_data,
|
||||
timeout=timeout,
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(e=e, provider_config=provider_config)
|
||||
|
||||
if isinstance(provider_config, litellm.DeepgramAudioTranscriptionConfig):
|
||||
returned_response = provider_config.transform_audio_transcription_response(
|
||||
model=model,
|
||||
raw_response=response,
|
||||
model_response=model_response,
|
||||
logging_obj=logging_obj,
|
||||
request_data={},
|
||||
optional_params=optional_params,
|
||||
litellm_params={},
|
||||
api_key=api_key,
|
||||
)
|
||||
return returned_response
|
||||
return model_response
|
||||
|
||||
def _handle_error(
|
||||
self, e: Exception, provider_config: Union[BaseConfig, BaseRerankConfig]
|
||||
):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue