forked from phoenix/litellm-mirror
(feat) Support audio
, modalities
params (#6304)
* add audio, modalities param * add test for gpt audio models * add get_supported_openai_params for GPT audio models * add supported params for audio * test_audio_output_from_model * bump openai to openai==1.52.0 * bump openai on pyproject * fix audio test * fix test mock_chat_response * handle audio for Message * fix handling audio for OAI compatible API endpoints * fix linting * fix mock dbrx test
This commit is contained in:
parent
e35fc3203e
commit
13e0b3f626
15 changed files with 290 additions and 23 deletions
76
tests/llm_translation/test_gpt4o_audio.py
Normal file
76
tests/llm_translation/test_gpt4o_audio.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from respx import MockRouter
|
||||
|
||||
import litellm
|
||||
from litellm import Choices, Message, ModelResponse
|
||||
import base64
|
||||
import requests
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
async def test_audio_output_from_model():
|
||||
litellm.set_verbose = True
|
||||
completion = await litellm.acompletion(
|
||||
model="gpt-4o-audio-preview",
|
||||
modalities=["text", "audio"],
|
||||
audio={"voice": "alloy", "format": "wav"},
|
||||
messages=[{"role": "user", "content": "response in 1 word - yes or no"}],
|
||||
)
|
||||
|
||||
print("response= ", completion)
|
||||
|
||||
print(completion.choices[0])
|
||||
|
||||
assert completion.choices[0].message.audio is not None
|
||||
assert isinstance(
|
||||
completion.choices[0].message.audio,
|
||||
litellm.types.utils.ChatCompletionAudioResponse,
|
||||
)
|
||||
assert len(completion.choices[0].message.audio.data) > 0
|
||||
|
||||
wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
|
||||
with open("dog.wav", "wb") as f:
|
||||
f.write(wav_bytes)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audio_input_to_model():
|
||||
# Fetch the audio file and convert it to a base64 encoded string
|
||||
url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
wav_data = response.content
|
||||
encoded_string = base64.b64encode(wav_data).decode("utf-8")
|
||||
|
||||
completion = await litellm.acompletion(
|
||||
model="gpt-4o-audio-preview",
|
||||
modalities=["text", "audio"],
|
||||
audio={"voice": "alloy", "format": "wav"},
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What is in this recording?"},
|
||||
{
|
||||
"type": "input_audio",
|
||||
"input_audio": {"data": encoded_string, "format": "wav"},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
print(completion.choices[0].message)
|
Loading…
Add table
Add a link
Reference in a new issue