(fix) litellm.text_completion raises a non-blocking error on simple usage (#6546)

* unit test test_huggingface_text_completion_logprobs

* fix return TextCompletionHandler convert_chat_to_text_completion

* fix hf rest api

* fix test_huggingface_text_completion_logprobs

* fix linting errors

* fix importLiteLLMResponseObjectHandler

* fix test for LiteLLMResponseObjectHandler

* fix test text completion
This commit is contained in:
Ishaan Jaff 2024-11-05 05:17:48 +05:30 committed by GitHub
parent 67ddf55ebd
commit 58ce30acee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 374 additions and 111 deletions

View file

@ -15,6 +15,7 @@ import litellm
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.secret_managers.main import get_secret_str
from litellm.types.completion import ChatCompletionMessageToolCallParam
from litellm.types.utils import Logprobs as TextCompletionLogprobs
from litellm.utils import Choices, CustomStreamWrapper, Message, ModelResponse, Usage
from .base import BaseLLM
@ -1183,3 +1184,73 @@ class Huggingface(BaseLLM):
input=input,
encoding=encoding,
)
def _transform_logprobs(
self, hf_response: Optional[List]
) -> Optional[TextCompletionLogprobs]:
"""
Transform Hugging Face logprobs to OpenAI.Completion() format
"""
if hf_response is None:
return None
# Initialize an empty list for the transformed logprobs
_logprob: TextCompletionLogprobs = TextCompletionLogprobs(
text_offset=[],
token_logprobs=[],
tokens=[],
top_logprobs=[],
)
# For each Hugging Face response, transform the logprobs
for response in hf_response:
# Extract the relevant information from the response
response_details = response["details"]
top_tokens = response_details.get("top_tokens", {})
for i, token in enumerate(response_details["prefill"]):
# Extract the text of the token
token_text = token["text"]
# Extract the logprob of the token
token_logprob = token["logprob"]
# Add the token information to the 'token_info' list
_logprob.tokens.append(token_text)
_logprob.token_logprobs.append(token_logprob)
# stub this to work with llm eval harness
top_alt_tokens = {"": -1.0, "": -2.0, "": -3.0} # noqa: F601
_logprob.top_logprobs.append(top_alt_tokens)
# For each element in the 'tokens' list, extract the relevant information
for i, token in enumerate(response_details["tokens"]):
# Extract the text of the token
token_text = token["text"]
# Extract the logprob of the token
token_logprob = token["logprob"]
top_alt_tokens = {}
temp_top_logprobs = []
if top_tokens != {}:
temp_top_logprobs = top_tokens[i]
# top_alt_tokens should look like this: { "alternative_1": -1, "alternative_2": -2, "alternative_3": -3 }
for elem in temp_top_logprobs:
text = elem["text"]
logprob = elem["logprob"]
top_alt_tokens[text] = logprob
# Add the token information to the 'token_info' list
_logprob.tokens.append(token_text)
_logprob.token_logprobs.append(token_logprob)
_logprob.top_logprobs.append(top_alt_tokens)
# Add the text offset of the token
# This is computed as the sum of the lengths of all previous tokens
_logprob.text_offset.append(
sum(len(t["text"]) for t in response_details["tokens"][:i])
)
return _logprob