(fix) litellm.text_completion raises a non-blocking error on simple usage (#6546)

* unit test test_huggingface_text_completion_logprobs * fix return TextCompletionHandler convert_chat_to_text_completion * fix hf rest api * fix test_huggingface_text_completion_logprobs * fix linting errors * fix importLiteLLMResponseObjectHandler * fix test for LiteLLMResponseObjectHandler * fix test text completion
2024-11-05 05:17:48 +05:30 · 2024-11-05 05:17:48 +05:30 · 58ce30acee
commit 58ce30acee
parent 67ddf55ebd
6 changed files with 374 additions and 111 deletions
--- a/tests/llm_translation/test_text_completion.py
+++ b/tests/llm_translation/test_text_completion.py
@ -0,0 +1,141 @@
+import json
+import os
+import sys
+from datetime import datetime
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+import litellm
+import pytest
+
+from litellm.utils import (
+    LiteLLMResponseObjectHandler,
+)
+
+
+from datetime import timedelta
+
+from litellm.types.utils import (
+    ModelResponse,
+    TextCompletionResponse,
+    TextChoices,
+    Logprobs as TextCompletionLogprobs,
+    Usage,
+)
+
+
+def test_convert_chat_to_text_completion():
+    """Test converting chat completion to text completion"""
+    chat_response = ModelResponse(
+        id="chat123",
+        created=1234567890,
+        model="gpt-3.5-turbo",
+        choices=[
+            {
+                "index": 0,
+                "message": {"content": "Hello, world!"},
+                "finish_reason": "stop",
+            }
+        ],
+        usage={"total_tokens": 10, "completion_tokens": 10},
+        _hidden_params={"api_key": "test"},
+    )
+
+    text_completion = TextCompletionResponse()
+    result = LiteLLMResponseObjectHandler.convert_chat_to_text_completion(
+        response=chat_response, text_completion_response=text_completion
+    )
+
+    assert isinstance(result, TextCompletionResponse)
+    assert result.id == "chat123"
+    assert result.object == "text_completion"
+    assert result.created == 1234567890
+    assert result.model == "gpt-3.5-turbo"
+    assert result.choices[0].text == "Hello, world!"
+    assert result.choices[0].finish_reason == "stop"
+    assert result.usage == Usage(
+        completion_tokens=10,
+        prompt_tokens=0,
+        total_tokens=10,
+        completion_tokens_details=None,
+        prompt_tokens_details=None,
+    )
+
+
+def test_convert_provider_response_logprobs():
+    """Test converting provider logprobs to text completion logprobs"""
+    response = ModelResponse(
+        id="test123",
+        _hidden_params={
+            "original_response": {
+                "details": {"tokens": [{"text": "hello", "logprob": -1.0}]}
+            }
+        },
+    )
+
+    result = LiteLLMResponseObjectHandler._convert_provider_response_logprobs_to_text_completion_logprobs(
+        response=response, custom_llm_provider="huggingface"
+    )
+
+    # Note: The actual assertion here depends on the implementation of
+    # litellm.huggingface._transform_logprobs, but we can at least test the function call
+    assert (
+        result is not None or result is None
+    )  # Will depend on the actual implementation
+
+
+def test_convert_provider_response_logprobs_non_huggingface():
+    """Test converting provider logprobs for non-huggingface provider"""
+    response = ModelResponse(id="test123", _hidden_params={})
+
+    result = LiteLLMResponseObjectHandler._convert_provider_response_logprobs_to_text_completion_logprobs(
+        response=response, custom_llm_provider="openai"
+    )
+
+    assert result is None
+
+
+def test_convert_chat_to_text_completion_multiple_choices():
+    """Test converting chat completion to text completion with multiple choices"""
+    chat_response = ModelResponse(
+        id="chat456",
+        created=1234567890,
+        model="gpt-3.5-turbo",
+        choices=[
+            {
+                "index": 0,
+                "message": {"content": "First response"},
+                "finish_reason": "stop",
+            },
+            {
+                "index": 1,
+                "message": {"content": "Second response"},
+                "finish_reason": "length",
+            },
+        ],
+        usage={"total_tokens": 20},
+        _hidden_params={"api_key": "test"},
+    )
+
+    text_completion = TextCompletionResponse()
+    result = LiteLLMResponseObjectHandler.convert_chat_to_text_completion(
+        response=chat_response, text_completion_response=text_completion
+    )
+
+    assert isinstance(result, TextCompletionResponse)
+    assert result.id == "chat456"
+    assert result.object == "text_completion"
+    assert len(result.choices) == 2
+    assert result.choices[0].text == "First response"
+    assert result.choices[0].finish_reason == "stop"
+    assert result.choices[1].text == "Second response"
+    assert result.choices[1].finish_reason == "length"
+    assert result.usage == Usage(
+        completion_tokens=0,
+        prompt_tokens=0,
+        total_tokens=20,
+        completion_tokens_details=None,
+        prompt_tokens_details=None,
+    )
--- a/tests/llm_translation/test_text_completion_unit_tests.py
+++ b/tests/llm_translation/test_text_completion_unit_tests.py
@ -3,11 +3,15 @@ import os
 import sys
 from datetime import datetime
 from unittest.mock import AsyncMock
+import pytest
+import httpx
+from respx import MockRouter

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path

+import litellm
 from litellm.types.utils import TextCompletionResponse


@ -62,3 +66,71 @@ def test_convert_dict_to_text_completion_response():
    assert response.choices[0].logprobs.token_logprobs == [None, -12.203847]
    assert response.choices[0].logprobs.tokens == ["hello", " crisp"]
    assert response.choices[0].logprobs.top_logprobs == [None, {",": -2.1568563}]
+
+
+@pytest.mark.asyncio
+@pytest.mark.respx
+async def test_huggingface_text_completion_logprobs(respx_mock: MockRouter):
+    """Test text completion with Hugging Face, focusing on logprobs structure"""
+    litellm.set_verbose = True
+
+    # Mock the raw response from Hugging Face
+    mock_response = [
+        {
+            "generated_text": ",\n\nI have a question...",  # truncated for brevity
+            "details": {
+                "finish_reason": "length",
+                "generated_tokens": 100,
+                "seed": None,
+                "prefill": [],
+                "tokens": [
+                    {"id": 28725, "text": ",", "logprob": -1.7626953, "special": False},
+                    {"id": 13, "text": "\n", "logprob": -1.7314453, "special": False},
+                ],
+            },
+        }
+    ]
+
+    # Mock the API request
+    mock_request = respx_mock.post(
+        "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-v0.1"
+    ).mock(return_value=httpx.Response(200, json=mock_response))
+
+    response = await litellm.atext_completion(
+        model="huggingface/mistralai/Mistral-7B-v0.1",
+        prompt="good morning",
+    )
+
+    # Verify the request
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+    assert request_body == {
+        "inputs": "good morning",
+        "parameters": {"details": True, "return_full_text": False},
+        "stream": False,
+    }
+
+    print("response=", response)
+
+    # Verify response structure
+    assert isinstance(response, TextCompletionResponse)
+    assert response.object == "text_completion"
+    assert response.model == "mistralai/Mistral-7B-v0.1"
+
+    # Verify logprobs structure
+    choice = response.choices[0]
+    assert choice.finish_reason == "length"
+    assert choice.index == 0
+    assert isinstance(choice.logprobs.tokens, list)
+    assert isinstance(choice.logprobs.token_logprobs, list)
+    assert isinstance(choice.logprobs.text_offset, list)
+    assert isinstance(choice.logprobs.top_logprobs, list)
+    assert choice.logprobs.tokens == [",", "\n"]
+    assert choice.logprobs.token_logprobs == [-1.7626953, -1.7314453]
+    assert choice.logprobs.text_offset == [0, 1]
+    assert choice.logprobs.top_logprobs == [{}, {}]
+
+    # Verify usage
+    assert response.usage["completion_tokens"] > 0
+    assert response.usage["prompt_tokens"] > 0
+    assert response.usage["total_tokens"] > 0