forked from phoenix/litellm-mirror
bump pyproject version
This commit is contained in:
parent
e4fbc8d908
commit
66a3c59ebe
5 changed files with 86 additions and 81 deletions
Binary file not shown.
Binary file not shown.
|
@ -214,6 +214,31 @@ def test_completion_cohere_stream():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
def test_completion_claude_stream():
|
||||||
|
try:
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "how does a court case get to the Supreme Court?",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
response = completion(
|
||||||
|
model="claude-instant-1", messages=messages, stream=True, max_tokens=50
|
||||||
|
)
|
||||||
|
complete_response = ""
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
complete_response += chunk
|
||||||
|
if complete_response.strip() == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
print(f"completion_response: {complete_response}")
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
# test_completion_claude_stream()
|
||||||
|
|
||||||
def test_completion_bedrock_ai21_stream():
|
def test_completion_bedrock_ai21_stream():
|
||||||
try:
|
try:
|
||||||
|
@ -327,28 +352,6 @@ def test_together_ai_completion_call_starcoder():
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def test_completion_nlp_cloud_streaming():
|
|
||||||
try:
|
|
||||||
messages = [
|
|
||||||
{"role": "system", "content": "You are a helpful assistant."},
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "how does a court case get to the Supreme Court?",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
|
|
||||||
complete_response = ""
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
for idx, chunk in enumerate(response):
|
|
||||||
chunk, finished = streaming_format_tests(idx, chunk)
|
|
||||||
if finished:
|
|
||||||
break
|
|
||||||
complete_response += chunk
|
|
||||||
if complete_response == "":
|
|
||||||
raise Exception("Empty response received")
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"Error occurred: {e}")
|
|
||||||
|
|
||||||
#### Test Function calling + streaming ####
|
#### Test Function calling + streaming ####
|
||||||
|
|
||||||
def test_completion_openai_with_functions():
|
def test_completion_openai_with_functions():
|
||||||
|
|
118
litellm/utils.py
118
litellm/utils.py
|
@ -2506,69 +2506,71 @@ class CustomStreamWrapper:
|
||||||
return chunk_data['outputText']
|
return chunk_data['outputText']
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
## needs to handle the empty string case (even starting chunk can be an empty string)
|
||||||
def __next__(self):
|
def __next__(self):
|
||||||
model_response = ModelResponse(stream=True, model=self.model)
|
model_response = ModelResponse(stream=True, model=self.model)
|
||||||
try:
|
try:
|
||||||
# return this for all models
|
while True: # loop until a non-empty string is found
|
||||||
completion_obj = {"content": ""}
|
# return this for all models
|
||||||
if self.sent_first_chunk == False:
|
completion_obj = {"content": ""}
|
||||||
completion_obj["role"] = "assistant"
|
if self.custom_llm_provider and self.custom_llm_provider == "anthropic":
|
||||||
self.sent_first_chunk = True
|
chunk = next(self.completion_stream)
|
||||||
if self.custom_llm_provider and self.custom_llm_provider == "anthropic":
|
completion_obj["content"] = self.handle_anthropic_chunk(chunk)
|
||||||
chunk = next(self.completion_stream)
|
elif self.model == "replicate" or self.custom_llm_provider == "replicate":
|
||||||
completion_obj["content"] = self.handle_anthropic_chunk(chunk)
|
chunk = next(self.completion_stream)
|
||||||
elif self.model == "replicate" or self.custom_llm_provider == "replicate":
|
completion_obj["content"] = chunk
|
||||||
chunk = next(self.completion_stream)
|
elif (
|
||||||
completion_obj["content"] = chunk
|
self.custom_llm_provider and self.custom_llm_provider == "together_ai"):
|
||||||
elif (
|
chunk = next(self.completion_stream)
|
||||||
self.custom_llm_provider and self.custom_llm_provider == "together_ai"):
|
text_data = self.handle_together_ai_chunk(chunk)
|
||||||
chunk = next(self.completion_stream)
|
if text_data == "":
|
||||||
text_data = self.handle_together_ai_chunk(chunk)
|
return self.__next__()
|
||||||
if text_data == "":
|
completion_obj["content"] = text_data
|
||||||
return self.__next__()
|
elif self.custom_llm_provider and self.custom_llm_provider == "huggingface":
|
||||||
completion_obj["content"] = text_data
|
chunk = next(self.completion_stream)
|
||||||
elif self.custom_llm_provider and self.custom_llm_provider == "huggingface":
|
completion_obj["content"] = self.handle_huggingface_chunk(chunk)
|
||||||
chunk = next(self.completion_stream)
|
elif self.custom_llm_provider and self.custom_llm_provider == "baseten": # baseten doesn't provide streaming
|
||||||
completion_obj["content"] = self.handle_huggingface_chunk(chunk)
|
chunk = next(self.completion_stream)
|
||||||
elif self.custom_llm_provider and self.custom_llm_provider == "baseten": # baseten doesn't provide streaming
|
completion_obj["content"] = self.handle_baseten_chunk(chunk)
|
||||||
chunk = next(self.completion_stream)
|
elif self.custom_llm_provider and self.custom_llm_provider == "ai21": #ai21 doesn't provide streaming
|
||||||
completion_obj["content"] = self.handle_baseten_chunk(chunk)
|
chunk = next(self.completion_stream)
|
||||||
elif self.custom_llm_provider and self.custom_llm_provider == "ai21": #ai21 doesn't provide streaming
|
completion_obj["content"] = self.handle_ai21_chunk(chunk)
|
||||||
chunk = next(self.completion_stream)
|
elif self.custom_llm_provider and self.custom_llm_provider == "vllm":
|
||||||
completion_obj["content"] = self.handle_ai21_chunk(chunk)
|
chunk = next(self.completion_stream)
|
||||||
elif self.custom_llm_provider and self.custom_llm_provider == "vllm":
|
completion_obj["content"] = chunk[0].outputs[0].text
|
||||||
chunk = next(self.completion_stream)
|
elif self.custom_llm_provider and self.custom_llm_provider == "aleph-alpha": #aleph alpha doesn't provide streaming
|
||||||
completion_obj["content"] = chunk[0].outputs[0].text
|
chunk = next(self.completion_stream)
|
||||||
elif self.custom_llm_provider and self.custom_llm_provider == "aleph-alpha": #aleph alpha doesn't provide streaming
|
completion_obj["content"] = self.handle_aleph_alpha_chunk(chunk)
|
||||||
chunk = next(self.completion_stream)
|
elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai":
|
||||||
completion_obj["content"] = self.handle_aleph_alpha_chunk(chunk)
|
chunk = next(self.completion_stream)
|
||||||
elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai":
|
completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
|
||||||
chunk = next(self.completion_stream)
|
elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud":
|
||||||
completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
|
chunk = next(self.completion_stream)
|
||||||
elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud":
|
completion_obj["content"] = self.handle_nlp_cloud_chunk(chunk)
|
||||||
chunk = next(self.completion_stream)
|
elif self.model in (litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models):
|
||||||
completion_obj["content"] = self.handle_nlp_cloud_chunk(chunk)
|
chunk = next(self.completion_stream)
|
||||||
elif self.model in (litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models):
|
completion_obj["content"] = str(chunk)
|
||||||
chunk = next(self.completion_stream)
|
elif self.custom_llm_provider == "cohere":
|
||||||
completion_obj["content"] = str(chunk)
|
chunk = next(self.completion_stream)
|
||||||
elif self.custom_llm_provider == "cohere":
|
completion_obj["content"] = self.handle_cohere_chunk(chunk)
|
||||||
chunk = next(self.completion_stream)
|
elif self.custom_llm_provider == "bedrock":
|
||||||
completion_obj["content"] = self.handle_cohere_chunk(chunk)
|
completion_obj["content"] = self.handle_bedrock_stream()
|
||||||
elif self.custom_llm_provider == "bedrock":
|
else: # openai chat/azure models
|
||||||
completion_obj["content"] = self.handle_bedrock_stream()
|
chunk = next(self.completion_stream)
|
||||||
else: # openai chat/azure models
|
model_response = chunk
|
||||||
chunk = next(self.completion_stream)
|
# LOGGING
|
||||||
model_response = chunk
|
threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
|
||||||
|
return model_response
|
||||||
|
|
||||||
# LOGGING
|
# LOGGING
|
||||||
threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
|
threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
|
||||||
return model_response
|
model_response.model = self.model
|
||||||
|
if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string
|
||||||
# LOGGING
|
if self.sent_first_chunk == False:
|
||||||
threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
|
completion_obj["role"] = "assistant"
|
||||||
model_response.model = self.model
|
self.sent_first_chunk = True
|
||||||
if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string
|
model_response.choices[0].delta = Delta(**completion_obj)
|
||||||
model_response.choices[0].delta = Delta(**completion_obj)
|
return model_response
|
||||||
return model_response
|
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.1.685"
|
version = "0.1.686"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue