forked from phoenix/litellm-mirror
improvements to streaming
This commit is contained in:
parent
63f3386dd0
commit
5470bc6fd0
4 changed files with 5 additions and 8 deletions
Binary file not shown.
Binary file not shown.
|
@ -172,6 +172,7 @@ def validate_last_format(chunk):
|
||||||
def streaming_format_tests(idx, chunk):
|
def streaming_format_tests(idx, chunk):
|
||||||
extracted_chunk = ""
|
extracted_chunk = ""
|
||||||
finished = False
|
finished = False
|
||||||
|
print(f"chunk: {chunk}")
|
||||||
if idx == 0: # ensure role assistant is set
|
if idx == 0: # ensure role assistant is set
|
||||||
validate_first_format(chunk=chunk)
|
validate_first_format(chunk=chunk)
|
||||||
role = chunk["choices"][0]["delta"]["role"]
|
role = chunk["choices"][0]["delta"]["role"]
|
||||||
|
|
|
@ -112,7 +112,6 @@ class StreamingChoices(OpenAIObject):
|
||||||
self.finish_reason = finish_reason
|
self.finish_reason = finish_reason
|
||||||
self.index = index
|
self.index = index
|
||||||
if delta:
|
if delta:
|
||||||
print(f"delta passed in: {delta}")
|
|
||||||
self.delta = delta
|
self.delta = delta
|
||||||
else:
|
else:
|
||||||
self.delta = Delta()
|
self.delta = Delta()
|
||||||
|
@ -2456,6 +2455,7 @@ class CustomStreamWrapper:
|
||||||
|
|
||||||
def handle_openai_text_completion_chunk(self, chunk):
|
def handle_openai_text_completion_chunk(self, chunk):
|
||||||
try:
|
try:
|
||||||
|
print(f"chunk: {chunk}")
|
||||||
return chunk["choices"][0]["text"]
|
return chunk["choices"][0]["text"]
|
||||||
except:
|
except:
|
||||||
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
||||||
|
@ -2507,6 +2507,7 @@ class CustomStreamWrapper:
|
||||||
model_response = ModelResponse(stream=True, model=self.model)
|
model_response = ModelResponse(stream=True, model=self.model)
|
||||||
try:
|
try:
|
||||||
# return this for all models
|
# return this for all models
|
||||||
|
print(f"self.sent_first_chunk: {self.sent_first_chunk}")
|
||||||
if self.sent_first_chunk == False:
|
if self.sent_first_chunk == False:
|
||||||
model_response.choices[0].delta.role = "assistant"
|
model_response.choices[0].delta.role = "assistant"
|
||||||
self.sent_first_chunk = True
|
self.sent_first_chunk = True
|
||||||
|
@ -2563,18 +2564,13 @@ class CustomStreamWrapper:
|
||||||
|
|
||||||
# LOGGING
|
# LOGGING
|
||||||
threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
|
threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
|
||||||
model_response = ModelResponse(stream=True)
|
|
||||||
model_response.choices[0].delta = completion_obj
|
|
||||||
model_response.model = self.model
|
model_response.model = self.model
|
||||||
|
model_response.choices[0].delta["content"] = completion_obj["content"]
|
||||||
if model_response.choices[0].delta.content == "<special_litellm_token>":
|
|
||||||
model_response.choices[0].delta = {
|
|
||||||
"content": completion_obj["content"],
|
|
||||||
}
|
|
||||||
return model_response
|
return model_response
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
model_response.choices[0].finish_reason = "stop"
|
model_response.choices[0].finish_reason = "stop"
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue