fixes to streaming

This commit is contained in:
Krrish Dholakia 2023-09-15 17:23:10 -07:00
parent 7b19d62564
commit 5b38f04524
5 changed files with 14 additions and 10 deletions

BIN
dist/litellm-0.1.651.tar.gz vendored Normal file

Binary file not shown.

View file

@ -123,7 +123,7 @@ def test_openai_chat_completion_call():
# raise Exception("invalid role") # raise Exception("invalid role")
if "content" in chunk["choices"][0]["delta"]: if "content" in chunk["choices"][0]["delta"]:
complete_response += chunk["choices"][0]["delta"]["content"] complete_response += chunk["choices"][0]["delta"]["content"]
print(f'complete_chunk: {complete_response}') # print(f'complete_chunk: {complete_response}')
if complete_response.strip() == "": if complete_response.strip() == "":
raise Exception("Empty response received") raise Exception("Empty response received")
except: except:

View file

@ -2453,6 +2453,8 @@ class CustomStreamWrapper:
def __next__(self): def __next__(self):
try: try:
# return this for all models
model_response = ModelResponse(stream=True)
completion_obj = {"content": ""} # default to role being assistant completion_obj = {"content": ""} # default to role being assistant
if self.model in litellm.anthropic_models: if self.model in litellm.anthropic_models:
chunk = next(self.completion_stream) chunk = next(self.completion_stream)
@ -2497,21 +2499,23 @@ class CustomStreamWrapper:
completion_obj["content"] = self.handle_cohere_chunk(chunk) completion_obj["content"] = self.handle_cohere_chunk(chunk)
else: # openai chat/azure models else: # openai chat/azure models
chunk = next(self.completion_stream) chunk = next(self.completion_stream)
completion_obj = chunk["choices"][0]["delta"] model_response = chunk
# LOGGING
threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
return model_response
# LOGGING # LOGGING
threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start() threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
# return this for all models
model_response = ModelResponse(stream=True) if model_response.choices[0].delta.content == "<special_litellm_token>":
model_response.choices[0].delta = { model_response.choices[0].delta = {
"content": completion_obj["content"], "content": completion_obj["content"],
} }
if "role" in completion_obj:
model_response.choices[0].delta = completion_obj
return model_response return model_response
except StopIteration: except StopIteration:
raise StopIteration raise StopIteration
except Exception as e: except Exception as e:
print(e)
model_response = ModelResponse(stream=True) model_response = ModelResponse(stream=True)
model_response.choices[0].finish_reason = "stop" model_response.choices[0].finish_reason = "stop"
return model_response return model_response

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "0.1.650" version = "0.1.651"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT License" license = "MIT License"