further fixes to together ai streaming

This commit is contained in:
Krrish Dholakia 2023-08-26 19:30:54 -07:00
parent bb3c2fefdb
commit 60873abb41
6 changed files with 39 additions and 21 deletions

View file

@ -98,6 +98,7 @@ def completion(
if fallbacks != []:
return completion_with_fallbacks(**args)
if litellm.model_alias_map and model in litellm.model_alias_map:
args["model_alias_map"] = litellm.model_alias_map
model = litellm.model_alias_map[model] # update the model to the actual value if an alias has been passed in
model_response = ModelResponse()
if azure: # this flag is deprecated, remove once notebooks are also updated.

View file

@ -62,22 +62,22 @@ messages = [{"content": user_message, "role": "user"}]
# test on anthropic completion call
# try:
# response = completion(
# model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
# )
# complete_response = ""
# start_time = time.time()
# for chunk in response:
# chunk_time = time.time()
# print(f"time since initial request: {chunk_time - start_time:.5f}")
# print(chunk["choices"][0]["delta"])
# complete_response += chunk["choices"][0]["delta"]["content"]
# if complete_response == "":
# raise Exception("Empty response received")
# except:
# print(f"error occurred: {traceback.format_exc()}")
# pass
try:
response = completion(
model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
)
complete_response = ""
start_time = time.time()
for chunk in response:
chunk_time = time.time()
print(f"time since initial request: {chunk_time - start_time:.5f}")
print(chunk["choices"][0]["delta"])
complete_response += chunk["choices"][0]["delta"]["content"]
if complete_response == "":
raise Exception("Empty response received")
except:
print(f"error occurred: {traceback.format_exc()}")
pass
# # test on huggingface completion call
@ -98,7 +98,7 @@ messages = [{"content": user_message, "role": "user"}]
# print(f"error occurred: {traceback.format_exc()}")
# pass
# test on together ai completion call
# test on together ai completion call - replit-code-3b
try:
start_time = time.time()
response = completion(
@ -117,6 +117,25 @@ except:
print(f"error occurred: {traceback.format_exc()}")
pass
# test on together ai completion call - starcoder
try:
start_time = time.time()
response = completion(
model="together_ai/bigcode/starcoder", messages=messages, logger_fn=logger_fn, stream= True
)
complete_response = ""
print(f"returned response object: {response}")
for chunk in response:
chunk_time = time.time()
complete_response += chunk["choices"][0]["delta"]["content"] if len(chunk["choices"][0]["delta"].keys()) > 0 else ""
if len(complete_response) > 0:
print(complete_response)
if complete_response == "":
raise Exception("Empty response received")
except:
print(f"error occurred: {traceback.format_exc()}")
pass
# # test on azure completion call
# try:

View file

@ -1468,8 +1468,6 @@ class CustomStreamWrapper:
if model in litellm.cohere_models:
# cohere does not return an iterator, so we need to wrap it in one
self.completion_stream = iter(completion_stream)
elif custom_llm_provider == "together_ai":
self.completion_stream = iter(completion_stream)
else:
self.completion_stream = completion_stream
@ -1512,7 +1510,7 @@ class CustomStreamWrapper:
elif self.model == "replicate":
chunk = next(self.completion_stream)
completion_obj["content"] = chunk
elif (self.model == "together_ai") or ("togethercomputer"
elif (self.custom_llm_provider and self.custom_llm_provider == "together_ai") or ("togethercomputer"
in self.model):
chunk = next(self.completion_stream)
text_data = self.handle_together_ai_chunk(chunk)

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "0.1.490"
version = "0.1.491"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT License"