forked from phoenix/litellm-mirror
Merge pull request #3547 from BerriAI/litellm_support_stream_options_text_completion
[Feat] support `stream_options` on `litellm.text_completion`
This commit is contained in:
commit
5eb12e30cc
4 changed files with 59 additions and 2 deletions
|
@ -1205,6 +1205,7 @@ class OpenAITextCompletion(BaseLLM):
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider="text-completion-openai",
|
custom_llm_provider="text-completion-openai",
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
|
stream_options=data.get("stream_options", None),
|
||||||
)
|
)
|
||||||
|
|
||||||
for chunk in streamwrapper:
|
for chunk in streamwrapper:
|
||||||
|
@ -1243,6 +1244,7 @@ class OpenAITextCompletion(BaseLLM):
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider="text-completion-openai",
|
custom_llm_provider="text-completion-openai",
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
|
stream_options=data.get("stream_options", None),
|
||||||
)
|
)
|
||||||
|
|
||||||
async for transformed_chunk in streamwrapper:
|
async for transformed_chunk in streamwrapper:
|
||||||
|
|
|
@ -3200,6 +3200,7 @@ def text_completion(
|
||||||
Union[str, List[str]]
|
Union[str, List[str]]
|
||||||
] = None, # Optional: Sequences where the API will stop generating further tokens.
|
] = None, # Optional: Sequences where the API will stop generating further tokens.
|
||||||
stream: Optional[bool] = None, # Optional: Whether to stream back partial progress.
|
stream: Optional[bool] = None, # Optional: Whether to stream back partial progress.
|
||||||
|
stream_options: Optional[dict] = None,
|
||||||
suffix: Optional[
|
suffix: Optional[
|
||||||
str
|
str
|
||||||
] = None, # Optional: The suffix that comes after a completion of inserted text.
|
] = None, # Optional: The suffix that comes after a completion of inserted text.
|
||||||
|
@ -3277,6 +3278,8 @@ def text_completion(
|
||||||
optional_params["stop"] = stop
|
optional_params["stop"] = stop
|
||||||
if stream is not None:
|
if stream is not None:
|
||||||
optional_params["stream"] = stream
|
optional_params["stream"] = stream
|
||||||
|
if stream_options is not None:
|
||||||
|
optional_params["stream_options"] = stream_options
|
||||||
if suffix is not None:
|
if suffix is not None:
|
||||||
optional_params["suffix"] = suffix
|
optional_params["suffix"] = suffix
|
||||||
if temperature is not None:
|
if temperature is not None:
|
||||||
|
@ -3387,7 +3390,9 @@ def text_completion(
|
||||||
if kwargs.get("acompletion", False) == True:
|
if kwargs.get("acompletion", False) == True:
|
||||||
return response
|
return response
|
||||||
if stream == True or kwargs.get("stream", False) == True:
|
if stream == True or kwargs.get("stream", False) == True:
|
||||||
response = TextCompletionStreamWrapper(completion_stream=response, model=model)
|
response = TextCompletionStreamWrapper(
|
||||||
|
completion_stream=response, model=model, stream_options=stream_options
|
||||||
|
)
|
||||||
return response
|
return response
|
||||||
transformed_logprobs = None
|
transformed_logprobs = None
|
||||||
# only supported for TGI models
|
# only supported for TGI models
|
||||||
|
|
|
@ -1534,6 +1534,39 @@ def test_openai_stream_options_call():
|
||||||
assert all(chunk.usage is None for chunk in chunks[:-1])
|
assert all(chunk.usage is None for chunk in chunks[:-1])
|
||||||
|
|
||||||
|
|
||||||
|
def test_openai_stream_options_call_text_completion():
|
||||||
|
litellm.set_verbose = False
|
||||||
|
response = litellm.text_completion(
|
||||||
|
model="gpt-3.5-turbo-instruct",
|
||||||
|
prompt="say GM - we're going to make it ",
|
||||||
|
stream=True,
|
||||||
|
stream_options={"include_usage": True},
|
||||||
|
max_tokens=10,
|
||||||
|
)
|
||||||
|
usage = None
|
||||||
|
chunks = []
|
||||||
|
for chunk in response:
|
||||||
|
print("chunk: ", chunk)
|
||||||
|
chunks.append(chunk)
|
||||||
|
|
||||||
|
last_chunk = chunks[-1]
|
||||||
|
print("last chunk: ", last_chunk)
|
||||||
|
|
||||||
|
"""
|
||||||
|
Assert that:
|
||||||
|
- Last Chunk includes Usage
|
||||||
|
- All chunks prior to last chunk have usage=None
|
||||||
|
"""
|
||||||
|
|
||||||
|
assert last_chunk.usage is not None
|
||||||
|
assert last_chunk.usage.total_tokens > 0
|
||||||
|
assert last_chunk.usage.prompt_tokens > 0
|
||||||
|
assert last_chunk.usage.completion_tokens > 0
|
||||||
|
|
||||||
|
# assert all non last chunks have usage=None
|
||||||
|
assert all(chunk.usage is None for chunk in chunks[:-1])
|
||||||
|
|
||||||
|
|
||||||
def test_openai_text_completion_call():
|
def test_openai_text_completion_call():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
|
@ -10062,16 +10062,19 @@ class CustomStreamWrapper:
|
||||||
text = ""
|
text = ""
|
||||||
is_finished = False
|
is_finished = False
|
||||||
finish_reason = None
|
finish_reason = None
|
||||||
|
usage = None
|
||||||
choices = getattr(chunk, "choices", [])
|
choices = getattr(chunk, "choices", [])
|
||||||
if len(choices) > 0:
|
if len(choices) > 0:
|
||||||
text = choices[0].text
|
text = choices[0].text
|
||||||
if choices[0].finish_reason is not None:
|
if choices[0].finish_reason is not None:
|
||||||
is_finished = True
|
is_finished = True
|
||||||
finish_reason = choices[0].finish_reason
|
finish_reason = choices[0].finish_reason
|
||||||
|
usage = getattr(chunk, "usage", None)
|
||||||
return {
|
return {
|
||||||
"text": text,
|
"text": text,
|
||||||
"is_finished": is_finished,
|
"is_finished": is_finished,
|
||||||
"finish_reason": finish_reason,
|
"finish_reason": finish_reason,
|
||||||
|
"usage": usage,
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -10601,6 +10604,11 @@ class CustomStreamWrapper:
|
||||||
print_verbose(f"completion obj content: {completion_obj['content']}")
|
print_verbose(f"completion obj content: {completion_obj['content']}")
|
||||||
if response_obj["is_finished"]:
|
if response_obj["is_finished"]:
|
||||||
self.received_finish_reason = response_obj["finish_reason"]
|
self.received_finish_reason = response_obj["finish_reason"]
|
||||||
|
if (
|
||||||
|
self.stream_options
|
||||||
|
and self.stream_options.get("include_usage", False) == True
|
||||||
|
):
|
||||||
|
model_response.usage = response_obj["usage"]
|
||||||
elif self.custom_llm_provider == "azure_text":
|
elif self.custom_llm_provider == "azure_text":
|
||||||
response_obj = self.handle_azure_text_completion_chunk(chunk)
|
response_obj = self.handle_azure_text_completion_chunk(chunk)
|
||||||
completion_obj["content"] = response_obj["text"]
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
@ -11130,9 +11138,10 @@ class CustomStreamWrapper:
|
||||||
|
|
||||||
|
|
||||||
class TextCompletionStreamWrapper:
|
class TextCompletionStreamWrapper:
|
||||||
def __init__(self, completion_stream, model):
|
def __init__(self, completion_stream, model, stream_options: Optional[dict] = None):
|
||||||
self.completion_stream = completion_stream
|
self.completion_stream = completion_stream
|
||||||
self.model = model
|
self.model = model
|
||||||
|
self.stream_options = stream_options
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return self
|
return self
|
||||||
|
@ -11156,6 +11165,14 @@ class TextCompletionStreamWrapper:
|
||||||
text_choices["index"] = chunk["choices"][0]["index"]
|
text_choices["index"] = chunk["choices"][0]["index"]
|
||||||
text_choices["finish_reason"] = chunk["choices"][0]["finish_reason"]
|
text_choices["finish_reason"] = chunk["choices"][0]["finish_reason"]
|
||||||
response["choices"] = [text_choices]
|
response["choices"] = [text_choices]
|
||||||
|
|
||||||
|
# only pass usage when stream_options["include_usage"] is True
|
||||||
|
if (
|
||||||
|
self.stream_options
|
||||||
|
and self.stream_options.get("include_usage", False) == True
|
||||||
|
):
|
||||||
|
response["usage"] = chunk.get("usage", None)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue