forked from phoenix/litellm-mirror
(fix) litellm.text_completion raises a non-blocking error on simple usage (#6546)
* unit test test_huggingface_text_completion_logprobs * fix return TextCompletionHandler convert_chat_to_text_completion * fix hf rest api * fix test_huggingface_text_completion_logprobs * fix linting errors * fix importLiteLLMResponseObjectHandler * fix test for LiteLLMResponseObjectHandler * fix test text completion
This commit is contained in:
parent
67ddf55ebd
commit
58ce30acee
6 changed files with 374 additions and 111 deletions
141
tests/llm_translation/test_text_completion.py
Normal file
141
tests/llm_translation/test_text_completion.py
Normal file
|
@ -0,0 +1,141 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
import litellm
|
||||
import pytest
|
||||
|
||||
from litellm.utils import (
|
||||
LiteLLMResponseObjectHandler,
|
||||
)
|
||||
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
from litellm.types.utils import (
|
||||
ModelResponse,
|
||||
TextCompletionResponse,
|
||||
TextChoices,
|
||||
Logprobs as TextCompletionLogprobs,
|
||||
Usage,
|
||||
)
|
||||
|
||||
|
||||
def test_convert_chat_to_text_completion():
|
||||
"""Test converting chat completion to text completion"""
|
||||
chat_response = ModelResponse(
|
||||
id="chat123",
|
||||
created=1234567890,
|
||||
model="gpt-3.5-turbo",
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"message": {"content": "Hello, world!"},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
usage={"total_tokens": 10, "completion_tokens": 10},
|
||||
_hidden_params={"api_key": "test"},
|
||||
)
|
||||
|
||||
text_completion = TextCompletionResponse()
|
||||
result = LiteLLMResponseObjectHandler.convert_chat_to_text_completion(
|
||||
response=chat_response, text_completion_response=text_completion
|
||||
)
|
||||
|
||||
assert isinstance(result, TextCompletionResponse)
|
||||
assert result.id == "chat123"
|
||||
assert result.object == "text_completion"
|
||||
assert result.created == 1234567890
|
||||
assert result.model == "gpt-3.5-turbo"
|
||||
assert result.choices[0].text == "Hello, world!"
|
||||
assert result.choices[0].finish_reason == "stop"
|
||||
assert result.usage == Usage(
|
||||
completion_tokens=10,
|
||||
prompt_tokens=0,
|
||||
total_tokens=10,
|
||||
completion_tokens_details=None,
|
||||
prompt_tokens_details=None,
|
||||
)
|
||||
|
||||
|
||||
def test_convert_provider_response_logprobs():
|
||||
"""Test converting provider logprobs to text completion logprobs"""
|
||||
response = ModelResponse(
|
||||
id="test123",
|
||||
_hidden_params={
|
||||
"original_response": {
|
||||
"details": {"tokens": [{"text": "hello", "logprob": -1.0}]}
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
result = LiteLLMResponseObjectHandler._convert_provider_response_logprobs_to_text_completion_logprobs(
|
||||
response=response, custom_llm_provider="huggingface"
|
||||
)
|
||||
|
||||
# Note: The actual assertion here depends on the implementation of
|
||||
# litellm.huggingface._transform_logprobs, but we can at least test the function call
|
||||
assert (
|
||||
result is not None or result is None
|
||||
) # Will depend on the actual implementation
|
||||
|
||||
|
||||
def test_convert_provider_response_logprobs_non_huggingface():
|
||||
"""Test converting provider logprobs for non-huggingface provider"""
|
||||
response = ModelResponse(id="test123", _hidden_params={})
|
||||
|
||||
result = LiteLLMResponseObjectHandler._convert_provider_response_logprobs_to_text_completion_logprobs(
|
||||
response=response, custom_llm_provider="openai"
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_convert_chat_to_text_completion_multiple_choices():
|
||||
"""Test converting chat completion to text completion with multiple choices"""
|
||||
chat_response = ModelResponse(
|
||||
id="chat456",
|
||||
created=1234567890,
|
||||
model="gpt-3.5-turbo",
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"message": {"content": "First response"},
|
||||
"finish_reason": "stop",
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"message": {"content": "Second response"},
|
||||
"finish_reason": "length",
|
||||
},
|
||||
],
|
||||
usage={"total_tokens": 20},
|
||||
_hidden_params={"api_key": "test"},
|
||||
)
|
||||
|
||||
text_completion = TextCompletionResponse()
|
||||
result = LiteLLMResponseObjectHandler.convert_chat_to_text_completion(
|
||||
response=chat_response, text_completion_response=text_completion
|
||||
)
|
||||
|
||||
assert isinstance(result, TextCompletionResponse)
|
||||
assert result.id == "chat456"
|
||||
assert result.object == "text_completion"
|
||||
assert len(result.choices) == 2
|
||||
assert result.choices[0].text == "First response"
|
||||
assert result.choices[0].finish_reason == "stop"
|
||||
assert result.choices[1].text == "Second response"
|
||||
assert result.choices[1].finish_reason == "length"
|
||||
assert result.usage == Usage(
|
||||
completion_tokens=0,
|
||||
prompt_tokens=0,
|
||||
total_tokens=20,
|
||||
completion_tokens_details=None,
|
||||
prompt_tokens_details=None,
|
||||
)
|
|
@ -3,11 +3,15 @@ import os
|
|||
import sys
|
||||
from datetime import datetime
|
||||
from unittest.mock import AsyncMock
|
||||
import pytest
|
||||
import httpx
|
||||
from respx import MockRouter
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
import litellm
|
||||
from litellm.types.utils import TextCompletionResponse
|
||||
|
||||
|
||||
|
@ -62,3 +66,71 @@ def test_convert_dict_to_text_completion_response():
|
|||
assert response.choices[0].logprobs.token_logprobs == [None, -12.203847]
|
||||
assert response.choices[0].logprobs.tokens == ["hello", " crisp"]
|
||||
assert response.choices[0].logprobs.top_logprobs == [None, {",": -2.1568563}]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.respx
|
||||
async def test_huggingface_text_completion_logprobs(respx_mock: MockRouter):
|
||||
"""Test text completion with Hugging Face, focusing on logprobs structure"""
|
||||
litellm.set_verbose = True
|
||||
|
||||
# Mock the raw response from Hugging Face
|
||||
mock_response = [
|
||||
{
|
||||
"generated_text": ",\n\nI have a question...", # truncated for brevity
|
||||
"details": {
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 100,
|
||||
"seed": None,
|
||||
"prefill": [],
|
||||
"tokens": [
|
||||
{"id": 28725, "text": ",", "logprob": -1.7626953, "special": False},
|
||||
{"id": 13, "text": "\n", "logprob": -1.7314453, "special": False},
|
||||
],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
# Mock the API request
|
||||
mock_request = respx_mock.post(
|
||||
"https://api-inference.huggingface.co/models/mistralai/Mistral-7B-v0.1"
|
||||
).mock(return_value=httpx.Response(200, json=mock_response))
|
||||
|
||||
response = await litellm.atext_completion(
|
||||
model="huggingface/mistralai/Mistral-7B-v0.1",
|
||||
prompt="good morning",
|
||||
)
|
||||
|
||||
# Verify the request
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
assert request_body == {
|
||||
"inputs": "good morning",
|
||||
"parameters": {"details": True, "return_full_text": False},
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
print("response=", response)
|
||||
|
||||
# Verify response structure
|
||||
assert isinstance(response, TextCompletionResponse)
|
||||
assert response.object == "text_completion"
|
||||
assert response.model == "mistralai/Mistral-7B-v0.1"
|
||||
|
||||
# Verify logprobs structure
|
||||
choice = response.choices[0]
|
||||
assert choice.finish_reason == "length"
|
||||
assert choice.index == 0
|
||||
assert isinstance(choice.logprobs.tokens, list)
|
||||
assert isinstance(choice.logprobs.token_logprobs, list)
|
||||
assert isinstance(choice.logprobs.text_offset, list)
|
||||
assert isinstance(choice.logprobs.top_logprobs, list)
|
||||
assert choice.logprobs.tokens == [",", "\n"]
|
||||
assert choice.logprobs.token_logprobs == [-1.7626953, -1.7314453]
|
||||
assert choice.logprobs.text_offset == [0, 1]
|
||||
assert choice.logprobs.top_logprobs == [{}, {}]
|
||||
|
||||
# Verify usage
|
||||
assert response.usage["completion_tokens"] > 0
|
||||
assert response.usage["prompt_tokens"] > 0
|
||||
assert response.usage["total_tokens"] > 0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue