litellm-mirror/tests/llm_translation/test_gpt4o_audio.py
Krish Dholakia 142b195784
Add anthropic thinking + reasoning content support (#8778)
* feat(anthropic/chat/transformation.py): add anthropic thinking param support

* feat(anthropic/chat/transformation.py): support returning thinking content for anthropic on streaming responses

* feat(anthropic/chat/transformation.py): return list of thinking blocks (include block signature)

allows usage in tool call responses

* fix(types/utils.py): extract and map reasoning_content from anthropic as content str

* test: add testing to ensure thinking_blocks are returned at the root

* fix(anthropic/chat/handler.py): return thinking blocks on streaming - include signature

* feat(factory.py): handle anthropic thinking blocks translation if in assistant response

* test: handle openai internal instability

* test: handle openai audio instability

* ci: pin anthropic dep

* test: handle openai audio instability

* fix: fix linting error

* refactor(anthropic/chat/transformation.py): refactor function to remain <50 LOC

* fix: fix linting error

* fix: fix linting error

* fix: fix linting error

* fix: fix linting error
2025-02-24 21:54:30 -08:00

131 lines
4.5 KiB
Python

import json
import os
import sys
from datetime import datetime
from unittest.mock import AsyncMock
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import httpx
import pytest
from respx import MockRouter
import litellm
from litellm import Choices, Message, ModelResponse
from litellm.types.utils import StreamingChoices, ChatCompletionAudioResponse
import base64
import requests
def check_non_streaming_response(completion):
assert completion.choices[0].message.audio is not None, "Audio response is missing"
assert isinstance(
completion.choices[0].message.audio, ChatCompletionAudioResponse
), "Invalid audio response type"
assert len(completion.choices[0].message.audio.data) > 0, "Audio data is empty"
async def check_streaming_response(completion):
_audio_bytes = None
_audio_transcript = None
_audio_id = None
async for chunk in completion:
print(chunk)
_choice: StreamingChoices = chunk.choices[0]
if _choice.delta.audio is not None:
if _choice.delta.audio.get("data") is not None:
_audio_bytes = _choice.delta.audio["data"]
if _choice.delta.audio.get("transcript") is not None:
_audio_transcript = _choice.delta.audio["transcript"]
if _choice.delta.audio.get("id") is not None:
_audio_id = _choice.delta.audio["id"]
# Atleast one chunk should have set _audio_bytes, _audio_transcript, _audio_id
assert _audio_bytes is not None
assert _audio_transcript is not None
assert _audio_id is not None
@pytest.mark.asyncio
# @pytest.mark.flaky(retries=3, delay=1)
@pytest.mark.parametrize("stream", [True, False])
async def test_audio_output_from_model(stream):
audio_format = "pcm16"
if stream is False:
audio_format = "wav"
litellm.set_verbose = False
try:
completion = await litellm.acompletion(
model="gpt-4o-audio-preview",
modalities=["text", "audio"],
audio={"voice": "alloy", "format": "pcm16"},
messages=[{"role": "user", "content": "response in 1 word - yes or no"}],
stream=stream,
)
except litellm.Timeout as e:
print(e)
pytest.skip("Skipping test due to timeout")
except Exception as e:
if "openai-internal" in str(e):
pytest.skip("Skipping test due to openai-internal error")
if stream is True:
await check_streaming_response(completion)
else:
print("response= ", completion)
check_non_streaming_response(completion)
wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
with open("dog.wav", "wb") as f:
f.write(wav_bytes)
@pytest.mark.asyncio
@pytest.mark.parametrize("stream", [True, False])
async def test_audio_input_to_model(stream):
# Fetch the audio file and convert it to a base64 encoded string
audio_format = "pcm16"
if stream is False:
audio_format = "wav"
litellm._turn_on_debug()
url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"
response = requests.get(url)
response.raise_for_status()
wav_data = response.content
encoded_string = base64.b64encode(wav_data).decode("utf-8")
try:
completion = await litellm.acompletion(
model="gpt-4o-audio-preview",
modalities=["text", "audio"],
audio={"voice": "alloy", "format": audio_format},
stream=stream,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "What is in this recording?"},
{
"type": "input_audio",
"input_audio": {"data": encoded_string, "format": "wav"},
},
],
},
],
)
except litellm.Timeout as e:
print(e)
pytest.skip("Skipping test due to timeout")
except Exception as e:
if "openai-internal" in str(e):
pytest.skip("Skipping test due to openai-internal error")
if stream is True:
await check_streaming_response(completion)
else:
print("response= ", completion)
check_non_streaming_response(completion)
wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
with open("dog.wav", "wb") as f:
f.write(wav_bytes)