forked from phoenix/litellm-mirror
fix streaming formatting for non-openai models
This commit is contained in:
parent
a63784d5b3
commit
e44c218c1b
6 changed files with 16 additions and 14 deletions
|
@ -1 +1 @@
|
||||||
Subproject commit c765f07b74f9a8cae211584ee70bad10e1a847a9
|
Subproject commit f2fe83e002a7c3ddedf4e500665644adfd31b9fc
|
Binary file not shown.
Binary file not shown.
|
@ -163,6 +163,7 @@ def completion(
|
||||||
custom_llm_provider = model.split("/", 1)[0]
|
custom_llm_provider = model.split("/", 1)[0]
|
||||||
model = model.split("/", 1)[1]
|
model = model.split("/", 1)[1]
|
||||||
model, custom_llm_provider = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider)
|
model, custom_llm_provider = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider)
|
||||||
|
print(f"model: {model}; llm provider: {custom_llm_provider}")
|
||||||
# check if user passed in any of the OpenAI optional params
|
# check if user passed in any of the OpenAI optional params
|
||||||
optional_params = get_optional_params(
|
optional_params = get_optional_params(
|
||||||
functions=functions,
|
functions=functions,
|
||||||
|
@ -245,7 +246,7 @@ def completion(
|
||||||
**optional_params,
|
**optional_params,
|
||||||
)
|
)
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
response = CustomStreamWrapper(response, model, logging_obj=logging)
|
response = CustomStreamWrapper(response, model, custom_llm_provider="openai", logging_obj=logging)
|
||||||
return response
|
return response
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging.post_call(
|
logging.post_call(
|
||||||
|
@ -310,7 +311,7 @@ def completion(
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
response = CustomStreamWrapper(response, model, logging_obj=logging)
|
response = CustomStreamWrapper(response, model, custom_llm_provider="openai", logging_obj=logging)
|
||||||
return response
|
return response
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging.post_call(
|
logging.post_call(
|
||||||
|
@ -374,7 +375,7 @@ def completion(
|
||||||
**optional_params
|
**optional_params
|
||||||
)
|
)
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
response = CustomStreamWrapper(response, model, custom_llm_provider="openai", logging_obj=logging)
|
response = CustomStreamWrapper(response, model, custom_llm_provider="text-completion-openai", logging_obj=logging)
|
||||||
return response
|
return response
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging.post_call(
|
logging.post_call(
|
||||||
|
@ -446,7 +447,7 @@ def completion(
|
||||||
)
|
)
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
# don't try to access stream object,
|
# don't try to access stream object,
|
||||||
response = CustomStreamWrapper(model_response, model, logging_obj=logging)
|
response = CustomStreamWrapper(model_response, model, custom_llm_provider="anthropic", logging_obj=logging)
|
||||||
return response
|
return response
|
||||||
response = model_response
|
response = model_response
|
||||||
elif model in litellm.nlp_cloud_models or custom_llm_provider == "nlp_cloud":
|
elif model in litellm.nlp_cloud_models or custom_llm_provider == "nlp_cloud":
|
||||||
|
@ -493,7 +494,7 @@ def completion(
|
||||||
|
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
# don't try to access stream object,
|
# don't try to access stream object,
|
||||||
response = CustomStreamWrapper(model_response, model, logging_obj=logging)
|
response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph-alpha", logging_obj=logging)
|
||||||
return response
|
return response
|
||||||
response = model_response
|
response = model_response
|
||||||
elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
|
elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
|
||||||
|
|
|
@ -2510,11 +2510,11 @@ class CustomStreamWrapper:
|
||||||
model_response = ModelResponse(stream=True, model=self.model)
|
model_response = ModelResponse(stream=True, model=self.model)
|
||||||
try:
|
try:
|
||||||
# return this for all models
|
# return this for all models
|
||||||
|
completion_obj = {"content": ""}
|
||||||
if self.sent_first_chunk == False:
|
if self.sent_first_chunk == False:
|
||||||
model_response.choices[0].delta.role = "assistant"
|
completion_obj["role"] = "assistant"
|
||||||
self.sent_first_chunk = True
|
self.sent_first_chunk = True
|
||||||
completion_obj = {"content": ""} # default to role being assistant
|
if self.custom_llm_provider and self.custom_llm_provider == "anthropic":
|
||||||
if self.model in litellm.anthropic_models:
|
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = self.handle_anthropic_chunk(chunk)
|
completion_obj["content"] = self.handle_anthropic_chunk(chunk)
|
||||||
elif self.model == "replicate" or self.custom_llm_provider == "replicate":
|
elif self.model == "replicate" or self.custom_llm_provider == "replicate":
|
||||||
|
@ -2539,10 +2539,10 @@ class CustomStreamWrapper:
|
||||||
elif self.custom_llm_provider and self.custom_llm_provider == "vllm":
|
elif self.custom_llm_provider and self.custom_llm_provider == "vllm":
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = chunk[0].outputs[0].text
|
completion_obj["content"] = chunk[0].outputs[0].text
|
||||||
elif self.model in litellm.aleph_alpha_models: #aleph alpha doesn't provide streaming
|
elif self.custom_llm_provider and self.custom_llm_provider == "aleph-alpha": #aleph alpha doesn't provide streaming
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = self.handle_aleph_alpha_chunk(chunk)
|
completion_obj["content"] = self.handle_aleph_alpha_chunk(chunk)
|
||||||
elif self.model in litellm.open_ai_text_completion_models:
|
elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai":
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
|
completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
|
||||||
elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud":
|
elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud":
|
||||||
|
@ -2551,7 +2551,7 @@ class CustomStreamWrapper:
|
||||||
elif self.model in (litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models):
|
elif self.model in (litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models):
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = str(chunk)
|
completion_obj["content"] = str(chunk)
|
||||||
elif self.model in litellm.cohere_models or self.custom_llm_provider == "cohere":
|
elif self.custom_llm_provider == "cohere":
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = self.handle_cohere_chunk(chunk)
|
completion_obj["content"] = self.handle_cohere_chunk(chunk)
|
||||||
elif self.custom_llm_provider == "bedrock":
|
elif self.custom_llm_provider == "bedrock":
|
||||||
|
@ -2566,7 +2566,8 @@ class CustomStreamWrapper:
|
||||||
# LOGGING
|
# LOGGING
|
||||||
threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
|
threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
|
||||||
model_response.model = self.model
|
model_response.model = self.model
|
||||||
model_response.choices[0].delta["content"] = completion_obj["content"]
|
if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string
|
||||||
|
model_response.choices[0].delta = Delta(**completion_obj)
|
||||||
return model_response
|
return model_response
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.1.683"
|
version = "0.1.684"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue