fix(openai.py): return logprobs for text completion calls

This commit is contained in:
Krrish Dholakia 2024-04-02 14:05:56 -07:00
parent 80f8645e1a
commit b07788d2a5
6 changed files with 50459 additions and 82 deletions

View file

@ -8,6 +8,7 @@ from litellm.utils import (
CustomStreamWrapper, CustomStreamWrapper,
convert_to_model_response_object, convert_to_model_response_object,
TranscriptionResponse, TranscriptionResponse,
TextCompletionResponse,
) )
from typing import Callable, Optional, BinaryIO from typing import Callable, Optional, BinaryIO
from litellm import OpenAIConfig from litellm import OpenAIConfig
@ -15,11 +16,11 @@ import litellm, json
import httpx import httpx
from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
from openai import AzureOpenAI, AsyncAzureOpenAI from openai import AzureOpenAI, AsyncAzureOpenAI
from ..llms.openai import OpenAITextCompletion from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig
import uuid import uuid
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt
openai_text_completion = OpenAITextCompletion() openai_text_completion_config = OpenAITextCompletionConfig()
class AzureOpenAIError(Exception): class AzureOpenAIError(Exception):
@ -300,9 +301,11 @@ class AzureTextCompletion(BaseLLM):
"api_base": api_base, "api_base": api_base,
}, },
) )
return openai_text_completion.convert_to_model_response_object( return (
response_object=stringified_response, openai_text_completion_config.convert_to_chat_model_response_object(
model_response_object=model_response, response_object=TextCompletionResponse(**stringified_response),
model_response_object=model_response,
)
) )
except AzureOpenAIError as e: except AzureOpenAIError as e:
exception_mapping_worked = True exception_mapping_worked = True
@ -373,7 +376,7 @@ class AzureTextCompletion(BaseLLM):
}, },
) )
response = await azure_client.completions.create(**data, timeout=timeout) response = await azure_client.completions.create(**data, timeout=timeout)
return openai_text_completion.convert_to_model_response_object( return openai_text_completion_config.convert_to_chat_model_response_object(
response_object=response.model_dump(), response_object=response.model_dump(),
model_response_object=model_response, model_response_object=model_response,
) )

View file

@ -10,6 +10,7 @@ from litellm.utils import (
convert_to_model_response_object, convert_to_model_response_object,
Usage, Usage,
TranscriptionResponse, TranscriptionResponse,
TextCompletionResponse,
) )
from typing import Callable, Optional from typing import Callable, Optional
import aiohttp, requests import aiohttp, requests
@ -200,6 +201,43 @@ class OpenAITextCompletionConfig:
and v is not None and v is not None
} }
def convert_to_chat_model_response_object(
self,
response_object: Optional[TextCompletionResponse] = None,
model_response_object: Optional[ModelResponse] = None,
):
try:
## RESPONSE OBJECT
if response_object is None or model_response_object is None:
raise ValueError("Error in response object format")
choice_list = []
for idx, choice in enumerate(response_object["choices"]):
message = Message(
content=choice["text"],
role="assistant",
)
choice = Choices(
finish_reason=choice["finish_reason"], index=idx, message=message
)
choice_list.append(choice)
model_response_object.choices = choice_list
if "usage" in response_object:
model_response_object.usage = response_object["usage"]
if "id" in response_object:
model_response_object.id = response_object["id"]
if "model" in response_object:
model_response_object.model = response_object["model"]
model_response_object._hidden_params["original_response"] = (
response_object # track original response, if users make a litellm.text_completion() request, we can return the original response
)
return model_response_object
except Exception as e:
raise e
class OpenAIChatCompletion(BaseLLM): class OpenAIChatCompletion(BaseLLM):
def __init__(self) -> None: def __init__(self) -> None:
@ -962,40 +1000,6 @@ class OpenAITextCompletion(BaseLLM):
headers["Authorization"] = f"Bearer {api_key}" headers["Authorization"] = f"Bearer {api_key}"
return headers return headers
def convert_to_model_response_object(
self,
response_object: Optional[dict] = None,
model_response_object: Optional[ModelResponse] = None,
):
try:
## RESPONSE OBJECT
if response_object is None or model_response_object is None:
raise ValueError("Error in response object format")
choice_list = []
for idx, choice in enumerate(response_object["choices"]):
message = Message(content=choice["text"], role="assistant")
choice = Choices(
finish_reason=choice["finish_reason"], index=idx, message=message
)
choice_list.append(choice)
model_response_object.choices = choice_list
if "usage" in response_object:
model_response_object.usage = response_object["usage"]
if "id" in response_object:
model_response_object.id = response_object["id"]
if "model" in response_object:
model_response_object.model = response_object["model"]
model_response_object._hidden_params["original_response"] = (
response_object # track original response, if users make a litellm.text_completion() request, we can return the original response
)
return model_response_object
except Exception as e:
raise e
def completion( def completion(
self, self,
model_response: ModelResponse, model_response: ModelResponse,
@ -1077,6 +1081,8 @@ class OpenAITextCompletion(BaseLLM):
status_code=response.status_code, message=response.text status_code=response.status_code, message=response.text
) )
response_json = response.json()
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(
input=prompt, input=prompt,
@ -1089,10 +1095,7 @@ class OpenAITextCompletion(BaseLLM):
) )
## RESPONSE OBJECT ## RESPONSE OBJECT
return self.convert_to_model_response_object( return TextCompletionResponse(**response_json)
response_object=response.json(),
model_response_object=model_response,
)
except Exception as e: except Exception as e:
raise e raise e
@ -1108,6 +1111,7 @@ class OpenAITextCompletion(BaseLLM):
model: str, model: str,
timeout: float, timeout: float,
): ):
async with httpx.AsyncClient(timeout=timeout) as client: async with httpx.AsyncClient(timeout=timeout) as client:
try: try:
response = await client.post( response = await client.post(
@ -1134,9 +1138,7 @@ class OpenAITextCompletion(BaseLLM):
) )
## RESPONSE OBJECT ## RESPONSE OBJECT
return self.convert_to_model_response_object( return TextCompletionResponse(**response_json)
response_object=response_json, model_response_object=model_response
)
except Exception as e: except Exception as e:
raise e raise e

File diff suppressed because it is too large Load diff

View file

@ -520,6 +520,9 @@ def completion(
eos_token = kwargs.get("eos_token", None) eos_token = kwargs.get("eos_token", None)
preset_cache_key = kwargs.get("preset_cache_key", None) preset_cache_key = kwargs.get("preset_cache_key", None)
hf_model_name = kwargs.get("hf_model_name", None) hf_model_name = kwargs.get("hf_model_name", None)
### TEXT COMPLETION CALLS ###
text_completion = kwargs.get("text_completion", False)
atext_completion = kwargs.get("atext_completion", False)
### ASYNC CALLS ### ### ASYNC CALLS ###
acompletion = kwargs.get("acompletion", False) acompletion = kwargs.get("acompletion", False)
client = kwargs.get("client", None) client = kwargs.get("client", None)
@ -561,6 +564,8 @@ def completion(
litellm_params = [ litellm_params = [
"metadata", "metadata",
"acompletion", "acompletion",
"atext_completion",
"text_completion",
"caching", "caching",
"mock_response", "mock_response",
"api_key", "api_key",
@ -1043,8 +1048,9 @@ def completion(
prompt = messages[0]["content"] prompt = messages[0]["content"]
else: else:
prompt = " ".join([message["content"] for message in messages]) # type: ignore prompt = " ".join([message["content"] for message in messages]) # type: ignore
## COMPLETION CALL ## COMPLETION CALL
model_response = openai_text_completions.completion( _response = openai_text_completions.completion(
model=model, model=model,
messages=messages, messages=messages,
model_response=model_response, model_response=model_response,
@ -1059,15 +1065,25 @@ def completion(
timeout=timeout, timeout=timeout,
) )
if (
optional_params.get("stream", False) == False
and acompletion == False
and text_completion == False
):
# convert to chat completion response
_response = litellm.OpenAITextCompletionConfig().convert_to_chat_model_response_object(
response_object=_response, model_response_object=model_response
)
if optional_params.get("stream", False) or acompletion == True: if optional_params.get("stream", False) or acompletion == True:
## LOGGING ## LOGGING
logging.post_call( logging.post_call(
input=messages, input=messages,
api_key=api_key, api_key=api_key,
original_response=model_response, original_response=_response,
additional_args={"headers": headers}, additional_args={"headers": headers},
) )
response = model_response response = _response
elif ( elif (
"replicate" in model "replicate" in model
or custom_llm_provider == "replicate" or custom_llm_provider == "replicate"
@ -2960,6 +2976,11 @@ async def atext_completion(*args, **kwargs):
transformed_logprobs = litellm.utils.transform_logprobs(raw_response) transformed_logprobs = litellm.utils.transform_logprobs(raw_response)
except Exception as e: except Exception as e:
print_verbose(f"LiteLLM non blocking exception: {e}") print_verbose(f"LiteLLM non blocking exception: {e}")
## TRANSLATE CHAT TO TEXT FORMAT ##
if isinstance(response, TextCompletionResponse):
return response
text_completion_response = TextCompletionResponse() text_completion_response = TextCompletionResponse()
text_completion_response["id"] = response.get("id", None) text_completion_response["id"] = response.get("id", None)
text_completion_response["object"] = "text_completion" text_completion_response["object"] = "text_completion"
@ -3156,7 +3177,7 @@ def text_completion(
concurrent.futures.as_completed(futures) concurrent.futures.as_completed(futures)
): ):
responses[i] = future.result() responses[i] = future.result()
text_completion_response.choices = responses text_completion_response.choices = responses # type: ignore
return text_completion_response return text_completion_response
# else: # else:
@ -3193,6 +3214,7 @@ def text_completion(
) )
kwargs.pop("prompt", None) kwargs.pop("prompt", None)
kwargs["text_completion"] = True
response = completion( response = completion(
model=model, model=model,
messages=messages, messages=messages,
@ -3213,6 +3235,9 @@ def text_completion(
except Exception as e: except Exception as e:
print_verbose(f"LiteLLM non blocking exception: {e}") print_verbose(f"LiteLLM non blocking exception: {e}")
if isinstance(response, TextCompletionResponse):
return response
text_completion_response["id"] = response.get("id", None) text_completion_response["id"] = response.get("id", None)
text_completion_response["object"] = "text_completion" text_completion_response["object"] = "text_completion"
text_completion_response["created"] = response.get("created", None) text_completion_response["created"] = response.get("created", None)

View file

@ -16,7 +16,9 @@ from litellm import (
text_completion, text_completion,
completion_cost, completion_cost,
atext_completion, atext_completion,
TextCompletionResponse,
) )
from litellm.utils import Logprobs
from litellm import RateLimitError from litellm import RateLimitError
litellm.num_retries = 3 litellm.num_retries = 3
@ -2963,3 +2965,21 @@ async def test_async_text_completion_chat_model_stream():
# asyncio.run(test_async_text_completion_chat_model_stream()) # asyncio.run(test_async_text_completion_chat_model_stream())
@pytest.mark.asyncio
async def test_async_text_completion_openai_logprobs():
response: TextCompletionResponse = await atext_completion(
model="gpt-3.5-turbo-instruct",
prompt=["Hey, how's it going?"],
max_tokens=1,
temperature=0.0,
n=1,
stop=["####"],
logprobs=5,
)
print(f"response: {response}")
assert response.choices[0].logprobs is not None
assert isinstance(response.choices[0].logprobs, Logprobs)
# asyncio.run(test_async_text_completion_openai_logprobs())

View file

@ -652,6 +652,13 @@ class EmbeddingResponse(OpenAIObject):
return self.dict() return self.dict()
class Logprobs(OpenAIObject):
text_offset: List[int]
token_logprobs: List[float]
tokens: List[str]
top_logprobs: List[Dict[str, float]]
class TextChoices(OpenAIObject): class TextChoices(OpenAIObject):
def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params): def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params):
super(TextChoices, self).__init__(**params) super(TextChoices, self).__init__(**params)
@ -664,10 +671,13 @@ class TextChoices(OpenAIObject):
self.text = text self.text = text
else: else:
self.text = None self.text = None
if logprobs: if logprobs is None:
self.logprobs = [] self.logprobs = None
else: else:
self.logprobs = logprobs if isinstance(logprobs, dict):
self.logprobs = Logprobs(**logprobs)
else:
self.logprobs = logprobs
def __contains__(self, key): def __contains__(self, key):
# Define custom behavior for the 'in' operator # Define custom behavior for the 'in' operator
@ -712,6 +722,15 @@ class TextCompletionResponse(OpenAIObject):
} }
""" """
id: str
object: str
created: int
model: Optional[str]
choices: List[TextChoices]
usage: Optional[Usage]
_response_ms: Optional[int] = None
_hidden_params: Optional[dict] = None
def __init__( def __init__(
self, self,
id=None, id=None,
@ -721,32 +740,58 @@ class TextCompletionResponse(OpenAIObject):
usage=None, usage=None,
stream=False, stream=False,
response_ms=None, response_ms=None,
object=None,
**params, **params,
): ):
super(TextCompletionResponse, self).__init__(**params)
if stream: if stream:
self.object = "text_completion.chunk" object = "text_completion.chunk"
self.choices = [TextChoices()] choices = [TextChoices()]
else: else:
self.object = "text_completion" object = "text_completion"
self.choices = [TextChoices()] if choices is not None and isinstance(choices, list):
new_choices = []
for choice in choices:
if isinstance(choice, TextChoices):
_new_choice = choice
elif isinstance(choice, dict):
_new_choice = TextChoices(**choice)
new_choices.append(_new_choice)
choices = new_choices
else:
choices = [TextChoices()]
if object is not None:
object = object
if id is None: if id is None:
self.id = _generate_id() id = _generate_id()
else: else:
self.id = id id = id
if created is None: if created is None:
self.created = int(time.time()) created = int(time.time())
else: else:
self.created = created created = created
model = model
if usage:
usage = usage
else:
usage = Usage()
super(TextCompletionResponse, self).__init__(
id=id,
object=object,
created=created,
model=model,
choices=choices,
usage=usage,
**params,
)
if response_ms: if response_ms:
self._response_ms = response_ms self._response_ms = response_ms
else: else:
self._response_ms = None self._response_ms = None
self.model = model
if usage:
self.usage = usage
else:
self.usage = Usage()
self._hidden_params = ( self._hidden_params = (
{} {}
) # used in case users want to access the original model response ) # used in case users want to access the original model response
@ -2513,11 +2558,12 @@ def client(original_function):
if is_coroutine == True: if is_coroutine == True:
pass pass
else: else:
model_response = original_response["choices"][0]["message"][ if isinstance(original_response, ModelResponse):
"content" model_response = original_response["choices"][0]["message"][
] "content"
### POST-CALL RULES ### ]
rules_obj.post_call_rules(input=model_response, model=model) ### POST-CALL RULES ###
rules_obj.post_call_rules(input=model_response, model=model)
except Exception as e: except Exception as e:
raise e raise e
@ -7082,7 +7128,10 @@ def exception_type(
or custom_llm_provider in litellm.openai_compatible_providers or custom_llm_provider in litellm.openai_compatible_providers
): ):
# custom_llm_provider is openai, make it OpenAI # custom_llm_provider is openai, make it OpenAI
message = original_exception.message if hasattr(original_exception, "message"):
message = original_exception.message
else:
message = str(original_exception)
if message is not None and isinstance(message, str): if message is not None and isinstance(message, str):
message = message.replace("OPENAI", custom_llm_provider.upper()) message = message.replace("OPENAI", custom_llm_provider.upper())
message = message.replace("openai", custom_llm_provider) message = message.replace("openai", custom_llm_provider)
@ -7231,10 +7280,12 @@ def exception_type(
else: else:
# if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
raise APIConnectionError( raise APIConnectionError(
__cause__=original_exception.__cause__, message=f"{exception_provider} - {message}",
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
model=model, model=model,
request=original_exception.request, request=httpx.Request(
method="POST", url="https://api.openai.com/v1/"
),
) )
elif custom_llm_provider == "anthropic": # one of the anthropics elif custom_llm_provider == "anthropic": # one of the anthropics
if hasattr(original_exception, "message"): if hasattr(original_exception, "message"):
@ -8304,14 +8355,10 @@ def exception_type(
else: else:
# if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
raise APIConnectionError( raise APIConnectionError(
__cause__=original_exception.__cause__, message=f"{exception_provider} - {message}",
llm_provider="azure", llm_provider="azure",
model=model, model=model,
request=getattr( request=httpx.Request(method="POST", url="https://openai.com/"),
original_exception,
"request",
httpx.Request(method="POST", url="https://openai.com/"),
),
) )
if ( if (
"BadRequestError.__init__() missing 1 required positional argument: 'param'" "BadRequestError.__init__() missing 1 required positional argument: 'param'"