mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
* fix(internal_user_endpoints.py): cleanup unused variables on beta endpoint no team/org split on daily user endpoint * build(model_prices_and_context_window.json): gemini-2.0-flash supports audio input * feat(gemini/transformation.py): support passing audio input to gemini * test: fix test * fix(gemini/transformation.py): support audio input as a url enables passing google cloud bucket urls * fix(gemini/transformation.py): support explicitly passing format of file * fix(gemini/transformation.py): expand support for inferred file types from url * fix(sagemaker/completion/transformation.py): fix special token error when counting sagemaker tokens * test: fix import
134 lines
4.6 KiB
Python
134 lines
4.6 KiB
Python
import json
|
|
import os
|
|
import sys
|
|
from datetime import datetime
|
|
from unittest.mock import AsyncMock
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
|
|
|
|
import httpx
|
|
import pytest
|
|
from respx import MockRouter
|
|
|
|
import litellm
|
|
from litellm import Choices, Message, ModelResponse
|
|
from litellm.types.utils import StreamingChoices, ChatCompletionAudioResponse
|
|
import base64
|
|
import requests
|
|
|
|
|
|
def check_non_streaming_response(completion):
|
|
assert completion.choices[0].message.audio is not None, "Audio response is missing"
|
|
assert isinstance(
|
|
completion.choices[0].message.audio, ChatCompletionAudioResponse
|
|
), "Invalid audio response type"
|
|
assert len(completion.choices[0].message.audio.data) > 0, "Audio data is empty"
|
|
|
|
|
|
async def check_streaming_response(completion):
|
|
_audio_bytes = None
|
|
_audio_transcript = None
|
|
_audio_id = None
|
|
async for chunk in completion:
|
|
print(chunk)
|
|
_choice: StreamingChoices = chunk.choices[0]
|
|
if _choice.delta.audio is not None:
|
|
if _choice.delta.audio.get("data") is not None:
|
|
_audio_bytes = _choice.delta.audio["data"]
|
|
if _choice.delta.audio.get("transcript") is not None:
|
|
_audio_transcript = _choice.delta.audio["transcript"]
|
|
if _choice.delta.audio.get("id") is not None:
|
|
_audio_id = _choice.delta.audio["id"]
|
|
# Atleast one chunk should have set _audio_bytes, _audio_transcript, _audio_id
|
|
assert _audio_bytes is not None
|
|
assert _audio_transcript is not None
|
|
assert _audio_id is not None
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
# @pytest.mark.flaky(retries=3, delay=1)
|
|
@pytest.mark.parametrize("stream", [True, False])
|
|
async def test_audio_output_from_model(stream):
|
|
audio_format = "pcm16"
|
|
if stream is False:
|
|
audio_format = "wav"
|
|
litellm.set_verbose = False
|
|
try:
|
|
completion = await litellm.acompletion(
|
|
model="gpt-4o-audio-preview",
|
|
modalities=["text", "audio"],
|
|
audio={"voice": "alloy", "format": "pcm16"},
|
|
messages=[{"role": "user", "content": "response in 1 word - yes or no"}],
|
|
stream=stream,
|
|
)
|
|
except litellm.Timeout as e:
|
|
print(e)
|
|
pytest.skip("Skipping test due to timeout")
|
|
except Exception as e:
|
|
if "openai-internal" in str(e):
|
|
pytest.skip("Skipping test due to openai-internal error")
|
|
|
|
if stream is True:
|
|
await check_streaming_response(completion)
|
|
|
|
else:
|
|
print("response= ", completion)
|
|
check_non_streaming_response(completion)
|
|
wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
|
|
with open("dog.wav", "wb") as f:
|
|
f.write(wav_bytes)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("stream", [True, False])
|
|
@pytest.mark.parametrize("model", ["gpt-4o-audio-preview"]) # "gpt-4o-audio-preview",
|
|
async def test_audio_input_to_model(stream, model):
|
|
# Fetch the audio file and convert it to a base64 encoded string
|
|
audio_format = "pcm16"
|
|
if stream is False:
|
|
audio_format = "wav"
|
|
litellm._turn_on_debug()
|
|
litellm.drop_params = True
|
|
url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"
|
|
response = requests.get(url)
|
|
response.raise_for_status()
|
|
wav_data = response.content
|
|
encoded_string = base64.b64encode(wav_data).decode("utf-8")
|
|
try:
|
|
completion = await litellm.acompletion(
|
|
model=model,
|
|
modalities=["text", "audio"],
|
|
audio={"voice": "alloy", "format": audio_format},
|
|
stream=stream,
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": "What is in this recording?"},
|
|
{
|
|
"type": "input_audio",
|
|
"input_audio": {"data": encoded_string, "format": "wav"},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
)
|
|
except litellm.Timeout as e:
|
|
print(e)
|
|
pytest.skip("Skipping test due to timeout")
|
|
except Exception as e:
|
|
if "openai-internal" in str(e):
|
|
pytest.skip("Skipping test due to openai-internal error")
|
|
raise e
|
|
if stream is True:
|
|
await check_streaming_response(completion)
|
|
else:
|
|
print("response= ", completion)
|
|
|
|
check_non_streaming_response(completion)
|
|
wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
|
|
with open("dog.wav", "wb") as f:
|
|
f.write(wav_bytes)
|