forked from phoenix/litellm-mirror
further fixes to together ai streaming
This commit is contained in:
parent
bb3c2fefdb
commit
60873abb41
6 changed files with 39 additions and 21 deletions
Binary file not shown.
Binary file not shown.
|
@ -98,6 +98,7 @@ def completion(
|
||||||
if fallbacks != []:
|
if fallbacks != []:
|
||||||
return completion_with_fallbacks(**args)
|
return completion_with_fallbacks(**args)
|
||||||
if litellm.model_alias_map and model in litellm.model_alias_map:
|
if litellm.model_alias_map and model in litellm.model_alias_map:
|
||||||
|
args["model_alias_map"] = litellm.model_alias_map
|
||||||
model = litellm.model_alias_map[model] # update the model to the actual value if an alias has been passed in
|
model = litellm.model_alias_map[model] # update the model to the actual value if an alias has been passed in
|
||||||
model_response = ModelResponse()
|
model_response = ModelResponse()
|
||||||
if azure: # this flag is deprecated, remove once notebooks are also updated.
|
if azure: # this flag is deprecated, remove once notebooks are also updated.
|
||||||
|
|
|
@ -62,22 +62,22 @@ messages = [{"content": user_message, "role": "user"}]
|
||||||
|
|
||||||
|
|
||||||
# test on anthropic completion call
|
# test on anthropic completion call
|
||||||
# try:
|
try:
|
||||||
# response = completion(
|
response = completion(
|
||||||
# model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
|
model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
|
||||||
# )
|
)
|
||||||
# complete_response = ""
|
complete_response = ""
|
||||||
# start_time = time.time()
|
start_time = time.time()
|
||||||
# for chunk in response:
|
for chunk in response:
|
||||||
# chunk_time = time.time()
|
chunk_time = time.time()
|
||||||
# print(f"time since initial request: {chunk_time - start_time:.5f}")
|
print(f"time since initial request: {chunk_time - start_time:.5f}")
|
||||||
# print(chunk["choices"][0]["delta"])
|
print(chunk["choices"][0]["delta"])
|
||||||
# complete_response += chunk["choices"][0]["delta"]["content"]
|
complete_response += chunk["choices"][0]["delta"]["content"]
|
||||||
# if complete_response == "":
|
if complete_response == "":
|
||||||
# raise Exception("Empty response received")
|
raise Exception("Empty response received")
|
||||||
# except:
|
except:
|
||||||
# print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
# pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
# # test on huggingface completion call
|
# # test on huggingface completion call
|
||||||
|
@ -98,7 +98,7 @@ messages = [{"content": user_message, "role": "user"}]
|
||||||
# print(f"error occurred: {traceback.format_exc()}")
|
# print(f"error occurred: {traceback.format_exc()}")
|
||||||
# pass
|
# pass
|
||||||
|
|
||||||
# test on together ai completion call
|
# test on together ai completion call - replit-code-3b
|
||||||
try:
|
try:
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
response = completion(
|
response = completion(
|
||||||
|
@ -117,6 +117,25 @@ except:
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# test on together ai completion call - starcoder
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
response = completion(
|
||||||
|
model="together_ai/bigcode/starcoder", messages=messages, logger_fn=logger_fn, stream= True
|
||||||
|
)
|
||||||
|
complete_response = ""
|
||||||
|
print(f"returned response object: {response}")
|
||||||
|
for chunk in response:
|
||||||
|
chunk_time = time.time()
|
||||||
|
complete_response += chunk["choices"][0]["delta"]["content"] if len(chunk["choices"][0]["delta"].keys()) > 0 else ""
|
||||||
|
if len(complete_response) > 0:
|
||||||
|
print(complete_response)
|
||||||
|
if complete_response == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
except:
|
||||||
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# # test on azure completion call
|
# # test on azure completion call
|
||||||
# try:
|
# try:
|
||||||
|
|
|
@ -1468,8 +1468,6 @@ class CustomStreamWrapper:
|
||||||
if model in litellm.cohere_models:
|
if model in litellm.cohere_models:
|
||||||
# cohere does not return an iterator, so we need to wrap it in one
|
# cohere does not return an iterator, so we need to wrap it in one
|
||||||
self.completion_stream = iter(completion_stream)
|
self.completion_stream = iter(completion_stream)
|
||||||
elif custom_llm_provider == "together_ai":
|
|
||||||
self.completion_stream = iter(completion_stream)
|
|
||||||
else:
|
else:
|
||||||
self.completion_stream = completion_stream
|
self.completion_stream = completion_stream
|
||||||
|
|
||||||
|
@ -1512,7 +1510,7 @@ class CustomStreamWrapper:
|
||||||
elif self.model == "replicate":
|
elif self.model == "replicate":
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = chunk
|
completion_obj["content"] = chunk
|
||||||
elif (self.model == "together_ai") or ("togethercomputer"
|
elif (self.custom_llm_provider and self.custom_llm_provider == "together_ai") or ("togethercomputer"
|
||||||
in self.model):
|
in self.model):
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
text_data = self.handle_together_ai_chunk(chunk)
|
text_data = self.handle_together_ai_chunk(chunk)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.1.490"
|
version = "0.1.491"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue