mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
fix exception mapping for streaming
This commit is contained in:
parent
f984e5f380
commit
889679a0dd
8 changed files with 766 additions and 100 deletions
Binary file not shown.
Binary file not shown.
|
@ -77,14 +77,16 @@ def handle_prediction_response_streaming(prediction_url, api_token, print_verbos
|
|||
}
|
||||
status = ""
|
||||
while True and (status not in ["succeeded", "failed", "canceled"]):
|
||||
time.sleep(0.0001)
|
||||
time.sleep(0.0001) # prevent being rate limited by replicate
|
||||
response = requests.get(prediction_url, headers=headers)
|
||||
if response.status_code == 200:
|
||||
response_data = response.json()
|
||||
status = response_data['status']
|
||||
print(f"response data: {response_data}")
|
||||
if "output" in response_data:
|
||||
output_string = "".join(response_data['output'])
|
||||
new_output = output_string[len(previous_output):]
|
||||
yield new_output
|
||||
yield {"output": new_output, "status": status}
|
||||
previous_output = output_string
|
||||
status = response_data['status']
|
||||
|
||||
|
|
|
@ -485,11 +485,11 @@ def completion(
|
|||
# Setting the relevant API KEY for replicate, replicate defaults to using os.environ.get("REPLICATE_API_TOKEN")
|
||||
replicate_key = None
|
||||
replicate_key = (
|
||||
get_secret("REPLICATE_API_KEY")
|
||||
or get_secret("REPLICATE_API_TOKEN")
|
||||
or api_key
|
||||
api_key
|
||||
or litellm.replicate_key
|
||||
or litellm.api_key
|
||||
or litellm.api_key
|
||||
or get_secret("REPLICATE_API_KEY")
|
||||
or get_secret("REPLICATE_API_TOKEN")
|
||||
)
|
||||
|
||||
model_response = replicate.completion(
|
||||
|
@ -575,7 +575,7 @@ def completion(
|
|||
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
# don't try to access stream object,
|
||||
response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph-alpha", logging_obj=logging)
|
||||
response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph_alpha", logging_obj=logging)
|
||||
return response
|
||||
response = model_response
|
||||
elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
|
||||
|
@ -769,7 +769,7 @@ def completion(
|
|||
if stream:
|
||||
model_response = chat.send_message_streaming(prompt, **optional_params)
|
||||
response = CustomStreamWrapper(
|
||||
model_response, model, custom_llm_provider="vertexai", logging_obj=logging
|
||||
model_response, model, custom_llm_provider="vertex_ai", logging_obj=logging
|
||||
)
|
||||
return response
|
||||
|
||||
|
|
|
@ -643,24 +643,6 @@ def test_completion_sagemaker():
|
|||
|
||||
# test_completion_sagemaker()
|
||||
|
||||
def test_completion_sagemaker_stream():
|
||||
litellm.set_verbose = False
|
||||
try:
|
||||
response = completion(
|
||||
model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b",
|
||||
messages=messages,
|
||||
temperature=0.2,
|
||||
max_tokens=80,
|
||||
stream=True,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
for chunk in response:
|
||||
print(chunk)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_sagemaker_stream()
|
||||
|
||||
def test_completion_bedrock_titan():
|
||||
try:
|
||||
response = completion(
|
||||
|
|
|
@ -9,7 +9,7 @@ sys.path.insert(
|
|||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from litellm import completion, acompletion
|
||||
from litellm import completion, acompletion, AuthenticationError, InvalidRequestError
|
||||
|
||||
litellm.logging = False
|
||||
litellm.set_verbose = False
|
||||
|
@ -187,6 +187,7 @@ def streaming_format_tests(idx, chunk):
|
|||
finished = True
|
||||
if "content" in chunk["choices"][0]["delta"]:
|
||||
extracted_chunk = chunk["choices"][0]["delta"]["content"]
|
||||
print(f"extracted chunk: {extracted_chunk}")
|
||||
return extracted_chunk, finished
|
||||
|
||||
def test_completion_cohere_stream():
|
||||
|
@ -199,21 +200,120 @@ def test_completion_cohere_stream():
|
|||
},
|
||||
]
|
||||
response = completion(
|
||||
model="command-nightly", messages=messages, stream=True, max_tokens=50
|
||||
model="command-nightly", messages=messages, stream=True, max_tokens=50,
|
||||
)
|
||||
complete_response = ""
|
||||
# Add any assertions here to check the response
|
||||
has_finish_reason = False
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
has_finish_reason = finished
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if has_finish_reason is False:
|
||||
raise Exception("Finish reason not in final chunk")
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_cohere_stream()
|
||||
|
||||
def test_completion_cohere_stream_bad_key():
|
||||
try:
|
||||
api_key = "bad-key"
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "how does a court case get to the Supreme Court?",
|
||||
},
|
||||
]
|
||||
response = completion(
|
||||
model="command-nightly", messages=messages, stream=True, max_tokens=50, api_key=api_key
|
||||
)
|
||||
complete_response = ""
|
||||
# Add any assertions here to check the response
|
||||
has_finish_reason = False
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
has_finish_reason = finished
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if has_finish_reason is False:
|
||||
raise Exception("Finish reason not in final chunk")
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}")
|
||||
except AuthenticationError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_cohere_stream_bad_key()
|
||||
|
||||
# def test_completion_nlp_cloud():
|
||||
# try:
|
||||
# messages = [
|
||||
# {"role": "system", "content": "You are a helpful assistant."},
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "how does a court case get to the Supreme Court?",
|
||||
# },
|
||||
# ]
|
||||
# response = completion(model="dolphin", messages=messages, stream=True)
|
||||
# complete_response = ""
|
||||
# # Add any assertions here to check the response
|
||||
# has_finish_reason = False
|
||||
# for idx, chunk in enumerate(response):
|
||||
# chunk, finished = streaming_format_tests(idx, chunk)
|
||||
# has_finish_reason = finished
|
||||
# complete_response += chunk
|
||||
# if finished:
|
||||
# break
|
||||
# if has_finish_reason is False:
|
||||
# raise Exception("Finish reason not in final chunk")
|
||||
# if complete_response.strip() == "":
|
||||
# raise Exception("Empty response received")
|
||||
# print(f"completion_response: {complete_response}")
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_nlp_cloud()
|
||||
|
||||
# def test_completion_nlp_cloud_bad_key():
|
||||
# try:
|
||||
# api_key = "bad-key"
|
||||
# messages = [
|
||||
# {"role": "system", "content": "You are a helpful assistant."},
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "how does a court case get to the Supreme Court?",
|
||||
# },
|
||||
# ]
|
||||
# response = completion(model="dolphin", messages=messages, stream=True, api_key=api_key)
|
||||
# complete_response = ""
|
||||
# # Add any assertions here to check the response
|
||||
# has_finish_reason = False
|
||||
# for idx, chunk in enumerate(response):
|
||||
# chunk, finished = streaming_format_tests(idx, chunk)
|
||||
# has_finish_reason = finished
|
||||
# complete_response += chunk
|
||||
# if finished:
|
||||
# break
|
||||
# if has_finish_reason is False:
|
||||
# raise Exception("Finish reason not in final chunk")
|
||||
# if complete_response.strip() == "":
|
||||
# raise Exception("Empty response received")
|
||||
# print(f"completion_response: {complete_response}")
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_nlp_cloud_bad_key()
|
||||
|
||||
# def test_completion_hf_stream():
|
||||
# try:
|
||||
# messages = [
|
||||
|
@ -235,10 +335,41 @@ def test_completion_cohere_stream():
|
|||
# if complete_response.strip() == "":
|
||||
# raise Exception("Empty response received")
|
||||
# print(f"completion_response: {complete_response}")
|
||||
# except InvalidRequestError as e:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_hf_stream()
|
||||
# # test_completion_hf_stream()
|
||||
|
||||
# def test_completion_hf_stream_bad_key():
|
||||
# try:
|
||||
# api_key = "bad-key"
|
||||
# messages = [
|
||||
# {
|
||||
# "content": "Hello! How are you today?",
|
||||
# "role": "user"
|
||||
# },
|
||||
# ]
|
||||
# response = completion(
|
||||
# model="huggingface/meta-llama/Llama-2-7b-chat-hf", messages=messages, api_base="https://a8l9e3ucxinyl3oj.us-east-1.aws.endpoints.huggingface.cloud", stream=True, max_tokens=1000, api_key=api_key
|
||||
# )
|
||||
# complete_response = ""
|
||||
# # Add any assertions here to check the response
|
||||
# for idx, chunk in enumerate(response):
|
||||
# chunk, finished = streaming_format_tests(idx, chunk)
|
||||
# if finished:
|
||||
# break
|
||||
# complete_response += chunk
|
||||
# if complete_response.strip() == "":
|
||||
# raise Exception("Empty response received")
|
||||
# print(f"completion_response: {complete_response}")
|
||||
# except InvalidRequestError as e:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_hf_stream_bad_key()
|
||||
|
||||
def test_completion_claude_stream():
|
||||
try:
|
||||
|
@ -266,19 +397,22 @@ def test_completion_claude_stream():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
# test_completion_claude_stream()
|
||||
|
||||
def test_completion_bedrock_ai21_stream():
|
||||
|
||||
def test_completion_claude_stream_bad_key():
|
||||
try:
|
||||
litellm.set_verbose = False
|
||||
api_key = "bad-key"
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "how does a court case get to the Supreme Court?",
|
||||
},
|
||||
]
|
||||
response = completion(
|
||||
model="bedrock/amazon.titan-tg1-large",
|
||||
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
|
||||
temperature=1,
|
||||
max_tokens=4096,
|
||||
stream=True,
|
||||
model="claude-instant-1", messages=messages, stream=True, max_tokens=50, api_key=api_key
|
||||
)
|
||||
complete_response = ""
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
complete_response = ""
|
||||
# Add any assertions here to check the response
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
if finished:
|
||||
|
@ -286,11 +420,263 @@ def test_completion_bedrock_ai21_stream():
|
|||
complete_response += chunk
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_completion_cohere_stream()
|
||||
# test_completion_claude_stream_bad_key()
|
||||
|
||||
def test_completion_replicate_stream():
|
||||
try:
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "how does a court case get to the Supreme Court?",
|
||||
},
|
||||
]
|
||||
response = completion(
|
||||
model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50
|
||||
)
|
||||
complete_response = ""
|
||||
has_finish_reason = False
|
||||
# Add any assertions here to check the response
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
has_finish_reason = finished
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if has_finish_reason is False:
|
||||
raise Exception("finish reason not set for last chunk")
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}")
|
||||
except InvalidRequestError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# test_completion_replicate_stream()
|
||||
|
||||
# def test_completion_vertexai_stream():
|
||||
# try:
|
||||
# import os
|
||||
# os.environ["VERTEXAI_PROJECT"] = "pathrise-convert-1606954137718"
|
||||
# os.environ["VERTEXAI_LOCATION"] = "us-central1"
|
||||
# messages = [
|
||||
# {"role": "system", "content": "You are a helpful assistant."},
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "how does a court case get to the Supreme Court?",
|
||||
# },
|
||||
# ]
|
||||
# response = completion(
|
||||
# model="vertex_ai/chat-bison", messages=messages, stream=True, max_tokens=50
|
||||
# )
|
||||
# complete_response = ""
|
||||
# has_finish_reason = False
|
||||
# # Add any assertions here to check the response
|
||||
# for idx, chunk in enumerate(response):
|
||||
# chunk, finished = streaming_format_tests(idx, chunk)
|
||||
# has_finish_reason = finished
|
||||
# if finished:
|
||||
# break
|
||||
# complete_response += chunk
|
||||
# if has_finish_reason is False:
|
||||
# raise Exception("finish reason not set for last chunk")
|
||||
# if complete_response.strip() == "":
|
||||
# raise Exception("Empty response received")
|
||||
# print(f"completion_response: {complete_response}")
|
||||
# except InvalidRequestError as e:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_vertexai_stream()
|
||||
|
||||
|
||||
# def test_completion_vertexai_stream_bad_key():
|
||||
# try:
|
||||
# import os
|
||||
# messages = [
|
||||
# {"role": "system", "content": "You are a helpful assistant."},
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "how does a court case get to the Supreme Court?",
|
||||
# },
|
||||
# ]
|
||||
# response = completion(
|
||||
# model="vertex_ai/chat-bison", messages=messages, stream=True, max_tokens=50
|
||||
# )
|
||||
# complete_response = ""
|
||||
# has_finish_reason = False
|
||||
# # Add any assertions here to check the response
|
||||
# for idx, chunk in enumerate(response):
|
||||
# chunk, finished = streaming_format_tests(idx, chunk)
|
||||
# has_finish_reason = finished
|
||||
# if finished:
|
||||
# break
|
||||
# complete_response += chunk
|
||||
# if has_finish_reason is False:
|
||||
# raise Exception("finish reason not set for last chunk")
|
||||
# if complete_response.strip() == "":
|
||||
# raise Exception("Empty response received")
|
||||
# print(f"completion_response: {complete_response}")
|
||||
# except InvalidRequestError as e:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_vertexai_stream_bad_key()
|
||||
|
||||
def test_completion_replicate_stream():
|
||||
try:
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "how does a court case get to the Supreme Court?",
|
||||
},
|
||||
]
|
||||
response = completion(
|
||||
model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50
|
||||
)
|
||||
complete_response = ""
|
||||
has_finish_reason = False
|
||||
# Add any assertions here to check the response
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
has_finish_reason = finished
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if has_finish_reason is False:
|
||||
raise Exception("finish reason not set for last chunk")
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}")
|
||||
except InvalidRequestError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
def test_completion_replicate_stream_bad_key():
|
||||
try:
|
||||
api_key = "bad-key"
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "how does a court case get to the Supreme Court?",
|
||||
},
|
||||
]
|
||||
response = completion(
|
||||
model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50, api_key=api_key
|
||||
)
|
||||
complete_response = ""
|
||||
# Add any assertions here to check the response
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}")
|
||||
except InvalidRequestError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_replicate_stream_bad_key()
|
||||
|
||||
def test_completion_bedrock_ai21_stream():
|
||||
try:
|
||||
response = completion(
|
||||
model="bedrock/amazon.titan-tg1-large",
|
||||
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
|
||||
temperature=1,
|
||||
max_tokens=4096,
|
||||
stream=True,
|
||||
)
|
||||
complete_response = ""
|
||||
has_finish_reason = False
|
||||
# Add any assertions here to check the response
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
has_finish_reason = finished
|
||||
complete_response += chunk
|
||||
if finished:
|
||||
break
|
||||
if has_finish_reason is False:
|
||||
raise Exception("finish reason not set for last chunk")
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_bedrock_ai21_stream()
|
||||
|
||||
def test_completion_bedrock_ai21_stream_bad_key():
|
||||
try:
|
||||
response = completion(
|
||||
model="bedrock/amazon.titan-tg1-large",
|
||||
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
|
||||
temperature=1,
|
||||
max_tokens=4096,
|
||||
stream=True,
|
||||
)
|
||||
complete_response = ""
|
||||
has_finish_reason = False
|
||||
# Add any assertions here to check the response
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
has_finish_reason = finished
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if has_finish_reason is False:
|
||||
raise Exception("finish reason not set for last chunk")
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}")
|
||||
except InvalidRequestError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_bedrock_ai21_stream_bad_key()
|
||||
|
||||
def test_completion_sagemaker_stream():
|
||||
try:
|
||||
response = completion(
|
||||
model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b",
|
||||
messages=messages,
|
||||
temperature=0.2,
|
||||
max_tokens=80,
|
||||
stream=True,
|
||||
)
|
||||
complete_response = ""
|
||||
has_finish_reason = False
|
||||
# Add any assertions here to check the response
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
has_finish_reason = finished
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if has_finish_reason is False:
|
||||
raise Exception("finish reason not set for last chunk")
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
except InvalidRequestError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
test_completion_sagemaker_stream()
|
||||
|
||||
# test on openai completion call
|
||||
def test_openai_text_completion_call():
|
||||
|
@ -314,7 +700,33 @@ def test_openai_text_completion_call():
|
|||
def ai21_completion_call():
|
||||
try:
|
||||
response = completion(
|
||||
model="j2-ultra", messages=messages, stream=True, logger_fn=logger_fn
|
||||
model="j2-ultra", messages=messages, stream=True
|
||||
)
|
||||
print(f"response: {response}")
|
||||
has_finished = False
|
||||
complete_response = ""
|
||||
start_time = time.time()
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
has_finished = finished
|
||||
complete_response += chunk
|
||||
if finished:
|
||||
break
|
||||
if has_finished is False:
|
||||
raise Exception("finished reason missing from final chunk")
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}")
|
||||
except:
|
||||
pytest.fail(f"error occurred: {traceback.format_exc()}")
|
||||
|
||||
# ai21_completion_call()
|
||||
|
||||
def ai21_completion_call_bad_key():
|
||||
try:
|
||||
api_key = "bad-key"
|
||||
response = completion(
|
||||
model="j2-ultra", messages=messages, stream=True, api_key=api_key
|
||||
)
|
||||
print(f"response: {response}")
|
||||
complete_response = ""
|
||||
|
@ -327,10 +739,64 @@ def ai21_completion_call():
|
|||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}")
|
||||
except InvalidRequestError as e:
|
||||
pass
|
||||
except:
|
||||
pytest.fail(f"error occurred: {traceback.format_exc()}")
|
||||
|
||||
# ai21_completion_call()
|
||||
# ai21_completion_call_bad_key()
|
||||
|
||||
def test_completion_aleph_alpha():
|
||||
try:
|
||||
response = completion(
|
||||
model="luminous-base", messages=messages, stream=True
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
has_finished = False
|
||||
complete_response = ""
|
||||
start_time = time.time()
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
has_finished = finished
|
||||
complete_response += chunk
|
||||
if finished:
|
||||
break
|
||||
if has_finished is False:
|
||||
raise Exception("finished reason missing from final chunk")
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_aleph_alpha()
|
||||
|
||||
# def test_completion_aleph_alpha_bad_key():
|
||||
# try:
|
||||
# api_key = "bad-key"
|
||||
# response = completion(
|
||||
# model="luminous-base", messages=messages, stream=True, api_key=api_key
|
||||
# )
|
||||
# # Add any assertions here to check the response
|
||||
# has_finished = False
|
||||
# complete_response = ""
|
||||
# start_time = time.time()
|
||||
# for idx, chunk in enumerate(response):
|
||||
# chunk, finished = streaming_format_tests(idx, chunk)
|
||||
# has_finished = finished
|
||||
# complete_response += chunk
|
||||
# if finished:
|
||||
# break
|
||||
# if has_finished is False:
|
||||
# raise Exception("finished reason missing from final chunk")
|
||||
# if complete_response.strip() == "":
|
||||
# raise Exception("Empty response received")
|
||||
# except InvalidRequestError as e:
|
||||
# pass
|
||||
# except Exception as e:
|
||||
# pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_aleph_alpha_bad_key()
|
||||
|
||||
# test on openai completion call
|
||||
def test_openai_chat_completion_call():
|
||||
try:
|
||||
|
@ -366,11 +832,15 @@ def test_together_ai_completion_call_starcoder():
|
|||
)
|
||||
complete_response = ""
|
||||
print(f"returned response object: {response}")
|
||||
has_finish_reason = False
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
has_finish_reason = finished
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if has_finish_reason is False:
|
||||
raise Exception("Finish reason not set for last chunk")
|
||||
if complete_response == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"complete response: {complete_response}")
|
||||
|
@ -378,6 +848,38 @@ def test_together_ai_completion_call_starcoder():
|
|||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
|
||||
# test_together_ai_completion_call_starcoder()
|
||||
|
||||
def test_together_ai_completion_call_starcoder_bad_key():
|
||||
try:
|
||||
api_key = "bad-key"
|
||||
start_time = time.time()
|
||||
response = completion(
|
||||
model="together_ai/bigcode/starcoder",
|
||||
messages=messages,
|
||||
stream=True,
|
||||
api_key=api_key
|
||||
)
|
||||
complete_response = ""
|
||||
has_finish_reason = False
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
has_finish_reason = finished
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if has_finish_reason is False:
|
||||
raise Exception("Finish reason not set for last chunk")
|
||||
if complete_response == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"complete response: {complete_response}")
|
||||
except InvalidRequestError as e:
|
||||
pass
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
|
||||
# test_together_ai_completion_call_starcoder_bad_key()
|
||||
#### Test Function calling + streaming ####
|
||||
|
||||
def test_completion_openai_with_functions():
|
||||
|
|
294
litellm/utils.py
294
litellm/utils.py
|
@ -2,6 +2,7 @@ import sys
|
|||
import dotenv, json, traceback, threading
|
||||
import subprocess, os
|
||||
import litellm, openai
|
||||
import itertools
|
||||
import random, uuid, requests
|
||||
import datetime, time
|
||||
import tiktoken
|
||||
|
@ -1915,7 +1916,6 @@ def exception_type(
|
|||
):
|
||||
global user_logger_fn, liteDebuggerClient
|
||||
exception_mapping_worked = False
|
||||
|
||||
if litellm.set_verbose == True:
|
||||
litellm.error_logs['EXCEPTION'] = original_exception
|
||||
litellm.error_logs['KWARGS'] = completion_kwargs
|
||||
|
@ -1970,7 +1970,7 @@ def exception_type(
|
|||
exception_type = type(original_exception).__name__
|
||||
else:
|
||||
exception_type = ""
|
||||
if "claude" in model: # one of the anthropics
|
||||
if custom_llm_provider == "anthropic": # one of the anthropics
|
||||
if hasattr(original_exception, "message"):
|
||||
if "prompt is too long" in original_exception.message:
|
||||
exception_mapping_worked = True
|
||||
|
@ -1979,6 +1979,13 @@ def exception_type(
|
|||
model=model,
|
||||
llm_provider="anthropic"
|
||||
)
|
||||
if "Invalid API Key" in original_exception.message:
|
||||
exception_mapping_worked = True
|
||||
raise AuthenticationError(
|
||||
message=original_exception.message,
|
||||
model=model,
|
||||
llm_provider="anthropic"
|
||||
)
|
||||
if hasattr(original_exception, "status_code"):
|
||||
print_verbose(f"status_code: {original_exception.status_code}")
|
||||
if original_exception.status_code == 401:
|
||||
|
@ -2031,7 +2038,7 @@ def exception_type(
|
|||
llm_provider="anthropic",
|
||||
model=model
|
||||
)
|
||||
elif "replicate" in model:
|
||||
elif custom_llm_provider == "replicate":
|
||||
if "Incorrect authentication token" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise AuthenticationError(
|
||||
|
@ -2068,7 +2075,7 @@ def exception_type(
|
|||
llm_provider="replicate",
|
||||
model=model
|
||||
)
|
||||
elif original_exception.status_code == 400:
|
||||
elif original_exception.status_code == 400 or original_exception.status_code == 422:
|
||||
exception_mapping_worked = True
|
||||
raise InvalidRequestError(
|
||||
message=f"ReplicateException - {original_exception.message}",
|
||||
|
@ -2110,7 +2117,31 @@ def exception_type(
|
|||
llm_provider="replicate",
|
||||
model=model
|
||||
)
|
||||
elif model in litellm.cohere_models or custom_llm_provider == "cohere": # Cohere
|
||||
elif custom_llm_provider == "bedrock":
|
||||
if "Unable to locate credentials" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise InvalidRequestError(
|
||||
message=f"BedrockException - {error_str}",
|
||||
model=model,
|
||||
llm_provider="bedrock"
|
||||
)
|
||||
elif custom_llm_provider == "sagemaker":
|
||||
if "Unable to locate credentials" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise InvalidRequestError(
|
||||
message=f"SagemakerException - {error_str}",
|
||||
model=model,
|
||||
llm_provider="sagemaker"
|
||||
)
|
||||
elif custom_llm_provider == "vertex_ai":
|
||||
if "Vertex AI API has not been used in project" in error_str or "Unable to find your project" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise InvalidRequestError(
|
||||
message=f"VertexAIException - {error_str}",
|
||||
model=model,
|
||||
llm_provider="vertex_ai"
|
||||
)
|
||||
elif custom_llm_provider == "cohere": # Cohere
|
||||
if (
|
||||
"invalid api token" in error_str
|
||||
or "No API key provided." in error_str
|
||||
|
@ -2184,6 +2215,13 @@ def exception_type(
|
|||
model=model,
|
||||
llm_provider="huggingface"
|
||||
)
|
||||
elif "A valid user token is required" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise InvalidRequestError(
|
||||
message=error_str,
|
||||
llm_provider="huggingface",
|
||||
model=model
|
||||
)
|
||||
if hasattr(original_exception, "status_code"):
|
||||
if original_exception.status_code == 401:
|
||||
exception_mapping_worked = True
|
||||
|
@ -2221,6 +2259,8 @@ def exception_type(
|
|||
llm_provider="huggingface",
|
||||
model=model
|
||||
)
|
||||
exception_mapping_worked = True
|
||||
raise APIError(status_code=500, message=error_str, model=model, llm_provider=custom_llm_provider)
|
||||
elif custom_llm_provider == "ai21":
|
||||
if hasattr(original_exception, "message"):
|
||||
if "Prompt has too many tokens" in original_exception.message:
|
||||
|
@ -2230,6 +2270,13 @@ def exception_type(
|
|||
model=model,
|
||||
llm_provider="ai21"
|
||||
)
|
||||
if "Bad or missing API token." in original_exception.message:
|
||||
exception_mapping_worked = True
|
||||
raise InvalidRequestError(
|
||||
message=f"AI21Exception - {original_exception.message}",
|
||||
model=model,
|
||||
llm_provider="ai21"
|
||||
)
|
||||
if hasattr(original_exception, "status_code"):
|
||||
if original_exception.status_code == 401:
|
||||
exception_mapping_worked = True
|
||||
|
@ -2266,7 +2313,7 @@ def exception_type(
|
|||
llm_provider="ai21",
|
||||
model=model
|
||||
)
|
||||
elif model in litellm.nlp_cloud_models or custom_llm_provider == "nlp_cloud":
|
||||
elif custom_llm_provider == "nlp_cloud":
|
||||
if "detail" in error_str:
|
||||
if "Input text length should not exceed" in error_str:
|
||||
exception_mapping_worked = True
|
||||
|
@ -2342,6 +2389,7 @@ def exception_type(
|
|||
model=model
|
||||
)
|
||||
elif custom_llm_provider == "together_ai":
|
||||
import json
|
||||
error_response = json.loads(error_str)
|
||||
if "error" in error_response and "`inputs` tokens + `max_new_tokens` must be <=" in error_response["error"]:
|
||||
exception_mapping_worked = True
|
||||
|
@ -2364,6 +2412,13 @@ def exception_type(
|
|||
model=model,
|
||||
llm_provider="together_ai"
|
||||
)
|
||||
elif "error" in error_response and "API key doesn't match expected format." in error_response["error"]:
|
||||
exception_mapping_worked = True
|
||||
raise InvalidRequestError(
|
||||
message=f"TogetherAIException - {error_response['error']}",
|
||||
model=model,
|
||||
llm_provider="together_ai"
|
||||
)
|
||||
elif "error_type" in error_response and error_response["error_type"] == "validation":
|
||||
exception_mapping_worked = True
|
||||
raise InvalidRequestError(
|
||||
|
@ -2393,7 +2448,7 @@ def exception_type(
|
|||
llm_provider="together_ai",
|
||||
model=model
|
||||
)
|
||||
elif model in litellm.aleph_alpha_models:
|
||||
elif custom_llm_provider == "aleph_alpha":
|
||||
if "This is longer than the model's maximum context length" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise ContextWindowExceededError(
|
||||
|
@ -2401,6 +2456,13 @@ def exception_type(
|
|||
llm_provider="aleph_alpha",
|
||||
model=model
|
||||
)
|
||||
elif "InvalidToken" in error_str or "No token provided" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise InvalidRequestError(
|
||||
message=f"AlephAlphaException - {original_exception.message}",
|
||||
llm_provider="aleph_alpha",
|
||||
model=model
|
||||
)
|
||||
elif hasattr(original_exception, "status_code"):
|
||||
print(f"status code: {original_exception.status_code}")
|
||||
if original_exception.status_code == 401:
|
||||
|
@ -2445,7 +2507,8 @@ def exception_type(
|
|||
elif custom_llm_provider == "ollama":
|
||||
if "no attribute 'async_get_ollama_response_stream" in error_str:
|
||||
raise ImportError("Import error - trying to use async for ollama. import async_generator failed. Try 'pip install async_generator'")
|
||||
raise original_exception
|
||||
exception_mapping_worked = True
|
||||
raise APIError(status_code=500, message=str(original_exception), llm_provider=custom_llm_provider, model=model)
|
||||
except Exception as e:
|
||||
# LOGGING
|
||||
exception_logging(
|
||||
|
@ -2563,6 +2626,7 @@ class CustomStreamWrapper:
|
|||
self.logging_obj = logging_obj
|
||||
self.completion_stream = completion_stream
|
||||
self.sent_first_chunk = False
|
||||
self.sent_last_chunk = False
|
||||
if self.logging_obj:
|
||||
# Log the type of the received item
|
||||
self.logging_obj.post_call(str(type(completion_stream)))
|
||||
|
@ -2579,41 +2643,71 @@ class CustomStreamWrapper:
|
|||
|
||||
def handle_anthropic_chunk(self, chunk):
|
||||
str_line = chunk.decode("utf-8") # Convert bytes to string
|
||||
print(f"str_line: {str_line}")
|
||||
text = ""
|
||||
is_finished = False
|
||||
finish_reason = None
|
||||
if str_line.startswith("data:"):
|
||||
data_json = json.loads(str_line[5:])
|
||||
return data_json.get("completion", "")
|
||||
return ""
|
||||
text = data_json.get("completion", "")
|
||||
if data_json.get("stop_reason", None):
|
||||
is_finished = True
|
||||
finish_reason = data_json["stop_reason"]
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
elif "error" in str_line:
|
||||
raise ValueError(f"Unable to parse response. Original response: {str_line}")
|
||||
else:
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
|
||||
def handle_together_ai_chunk(self, chunk):
|
||||
chunk = chunk.decode("utf-8")
|
||||
text_index = chunk.find('"text":"') # this checks if text: exists
|
||||
text_start = text_index + len('"text":"')
|
||||
text_end = chunk.find('"}', text_start)
|
||||
if text_index != -1 and text_end != -1:
|
||||
extracted_text = chunk[text_start:text_end]
|
||||
return extracted_text
|
||||
text = ""
|
||||
is_finished = False
|
||||
finish_reason = None
|
||||
if "text" in chunk:
|
||||
text_index = chunk.find('"text":"') # this checks if text: exists
|
||||
text_start = text_index + len('"text":"')
|
||||
text_end = chunk.find('"}', text_start)
|
||||
if text_index != -1 and text_end != -1:
|
||||
extracted_text = chunk[text_start:text_end]
|
||||
text = extracted_text
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
elif "[DONE]" in chunk:
|
||||
return {"text": text, "is_finished": True, "finish_reason": "stop"}
|
||||
elif "error" in chunk:
|
||||
raise ValueError(chunk)
|
||||
else:
|
||||
return ""
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
|
||||
def handle_huggingface_chunk(self, chunk):
|
||||
chunk = chunk.decode("utf-8")
|
||||
text = ""
|
||||
is_finished = False
|
||||
finish_reason = ""
|
||||
if chunk.startswith("data:"):
|
||||
data_json = json.loads(chunk[5:])
|
||||
print(f"data json: {data_json}")
|
||||
if "token" in data_json and "text" in data_json["token"]:
|
||||
text = data_json["token"]["text"]
|
||||
if "meta-llama/Llama-2" in self.model: #clean eos tokens like </s> from the returned output text
|
||||
if any(token in text for token in llama_2_special_tokens):
|
||||
text = text.replace("<s>", "").replace("</s>", "")
|
||||
return text
|
||||
else:
|
||||
return ""
|
||||
return ""
|
||||
if data_json.get("details", False) and data_json["details"].get("finish_reason", False):
|
||||
is_finished = True
|
||||
finish_reason = data_json["details"]["finish_reason"]
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
elif "error" in chunk:
|
||||
raise ValueError(chunk)
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
|
||||
def handle_ai21_chunk(self, chunk):
|
||||
def handle_ai21_chunk(self, chunk): # fake streaming
|
||||
chunk = chunk.decode("utf-8")
|
||||
data_json = json.loads(chunk)
|
||||
try:
|
||||
return data_json["completions"][0]["data"]["text"]
|
||||
text = data_json["completions"][0]["data"]["text"]
|
||||
is_finished = True
|
||||
finish_reason = "stop"
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
except:
|
||||
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
||||
|
||||
|
@ -2621,8 +2715,10 @@ class CustomStreamWrapper:
|
|||
chunk = chunk.decode("utf-8")
|
||||
data_json = json.loads(chunk)
|
||||
try:
|
||||
print(f"data json: {data_json}")
|
||||
return data_json["generated_text"]
|
||||
text = data_json["generated_text"]
|
||||
is_finished = True
|
||||
finish_reason = "stop"
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
except:
|
||||
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
||||
|
||||
|
@ -2630,7 +2726,10 @@ class CustomStreamWrapper:
|
|||
chunk = chunk.decode("utf-8")
|
||||
data_json = json.loads(chunk)
|
||||
try:
|
||||
return data_json["completions"][0]["completion"]
|
||||
text = data_json["completions"][0]["completion"]
|
||||
is_finished = True
|
||||
finish_reason = "stop"
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
except:
|
||||
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
||||
|
||||
|
@ -2638,7 +2737,35 @@ class CustomStreamWrapper:
|
|||
chunk = chunk.decode("utf-8")
|
||||
data_json = json.loads(chunk)
|
||||
try:
|
||||
return data_json["text"]
|
||||
text = ""
|
||||
is_finished = False
|
||||
finish_reason = ""
|
||||
if "text" in data_json:
|
||||
text = data_json["text"]
|
||||
elif "is_finished" in data_json:
|
||||
is_finished = data_json["is_finished"]
|
||||
finish_reason = data_json["finish_reason"]
|
||||
else:
|
||||
raise Exception(data_json)
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
except:
|
||||
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
||||
|
||||
def handle_replicate_chunk(self, chunk):
|
||||
print(f"chunk: {chunk}")
|
||||
try:
|
||||
text = ""
|
||||
is_finished = False
|
||||
finish_reason = ""
|
||||
if "output" in chunk:
|
||||
text = chunk['output']
|
||||
if "status" in chunk:
|
||||
if chunk["status"] == "succeeded":
|
||||
is_finished = True
|
||||
finish_reason = "stop"
|
||||
elif chunk.get("error", None):
|
||||
raise Exception(chunk["error"])
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
except:
|
||||
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
||||
|
||||
|
@ -2683,13 +2810,21 @@ class CustomStreamWrapper:
|
|||
traceback.print_exc()
|
||||
return ""
|
||||
|
||||
def handle_bedrock_stream(self):
|
||||
if self.completion_stream:
|
||||
event = next(self.completion_stream)
|
||||
chunk = event.get('chunk')
|
||||
if chunk:
|
||||
chunk_data = json.loads(chunk.get('bytes').decode())
|
||||
return chunk_data['outputText']
|
||||
def handle_bedrock_stream(self, chunk):
|
||||
chunk = chunk.get('chunk')
|
||||
if chunk:
|
||||
chunk_data = json.loads(chunk.get('bytes').decode())
|
||||
text = ""
|
||||
is_finished = False
|
||||
finish_reason = ""
|
||||
if "outputText" in chunk_data:
|
||||
text = chunk_data['outputText']
|
||||
if chunk_data.get("completionReason", None):
|
||||
is_finished = True
|
||||
finish_reason = chunk_data["completionReason"]
|
||||
elif chunk.get("error", None):
|
||||
raise Exception(chunk["error"])
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
return ""
|
||||
|
||||
## needs to handle the empty string case (even starting chunk can be an empty string)
|
||||
|
@ -2701,49 +2836,94 @@ class CustomStreamWrapper:
|
|||
completion_obj = {"content": ""}
|
||||
if self.custom_llm_provider and self.custom_llm_provider == "anthropic":
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = self.handle_anthropic_chunk(chunk)
|
||||
response_obj = self.handle_anthropic_chunk(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
if response_obj["is_finished"]:
|
||||
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||
elif self.model == "replicate" or self.custom_llm_provider == "replicate":
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = chunk
|
||||
response_obj = self.handle_replicate_chunk(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
if response_obj["is_finished"]:
|
||||
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||
elif (
|
||||
self.custom_llm_provider and self.custom_llm_provider == "together_ai"):
|
||||
chunk = next(self.completion_stream)
|
||||
text_data = self.handle_together_ai_chunk(chunk)
|
||||
if text_data == "":
|
||||
return self.__next__()
|
||||
completion_obj["content"] = text_data
|
||||
response_obj = self.handle_together_ai_chunk(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
if response_obj["is_finished"]:
|
||||
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider and self.custom_llm_provider == "huggingface":
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = self.handle_huggingface_chunk(chunk)
|
||||
response_obj = self.handle_huggingface_chunk(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
if response_obj["is_finished"]:
|
||||
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider and self.custom_llm_provider == "baseten": # baseten doesn't provide streaming
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = self.handle_baseten_chunk(chunk)
|
||||
elif self.custom_llm_provider and self.custom_llm_provider == "ai21": #ai21 doesn't provide streaming
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = self.handle_ai21_chunk(chunk)
|
||||
response_obj = self.handle_ai21_chunk(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
if response_obj["is_finished"]:
|
||||
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider and self.custom_llm_provider == "vllm":
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = chunk[0].outputs[0].text
|
||||
elif self.custom_llm_provider and self.custom_llm_provider == "aleph-alpha": #aleph alpha doesn't provide streaming
|
||||
elif self.custom_llm_provider and self.custom_llm_provider == "aleph_alpha": #aleph alpha doesn't provide streaming
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = self.handle_aleph_alpha_chunk(chunk)
|
||||
response_obj = self.handle_aleph_alpha_chunk(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
if response_obj["is_finished"]:
|
||||
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai":
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
|
||||
elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud":
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = self.handle_nlp_cloud_chunk(chunk)
|
||||
elif self.model in (litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models):
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = str(chunk)
|
||||
try:
|
||||
chunk = next(self.completion_stream)
|
||||
response_obj = self.handle_nlp_cloud_chunk(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
if response_obj["is_finished"]:
|
||||
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||
except Exception as e:
|
||||
if self.sent_last_chunk:
|
||||
raise e
|
||||
else:
|
||||
if self.sent_first_chunk is False:
|
||||
raise Exception("An unknown error occurred with the stream")
|
||||
model_response.choices[0].finish_reason = "stop"
|
||||
self.sent_last_chunk = True
|
||||
elif self.custom_llm_provider and self.custom_llm_provider == "vertex_ai":
|
||||
try:
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = str(chunk)
|
||||
except StopIteration as e:
|
||||
if self.sent_last_chunk:
|
||||
raise e
|
||||
else:
|
||||
model_response.choices[0].finish_reason = "stop"
|
||||
self.sent_last_chunk = True
|
||||
elif self.custom_llm_provider == "cohere":
|
||||
chunk = next(self.completion_stream)
|
||||
completion_obj["content"] = self.handle_cohere_chunk(chunk)
|
||||
response_obj = self.handle_cohere_chunk(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
if response_obj["is_finished"]:
|
||||
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider == "bedrock":
|
||||
completion_obj["content"] = self.handle_bedrock_stream()
|
||||
chunk = next(self.completion_stream)
|
||||
response_obj = self.handle_bedrock_stream(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
if response_obj["is_finished"]:
|
||||
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider == "sagemaker":
|
||||
if len(self.completion_stream)==0:
|
||||
raise StopIteration
|
||||
if self.sent_last_chunk:
|
||||
raise StopIteration
|
||||
else:
|
||||
model_response.choices[0].finish_reason = "stop"
|
||||
self.sent_last_chunk = True
|
||||
chunk_size = 30
|
||||
new_chunk = self.completion_stream[:chunk_size]
|
||||
completion_obj["content"] = new_chunk
|
||||
|
@ -2765,11 +2945,13 @@ class CustomStreamWrapper:
|
|||
self.sent_first_chunk = True
|
||||
model_response.choices[0].delta = Delta(**completion_obj)
|
||||
return model_response
|
||||
elif model_response.choices[0].finish_reason:
|
||||
return model_response
|
||||
except StopIteration:
|
||||
raise StopIteration
|
||||
except Exception as e:
|
||||
model_response.choices[0].finish_reason = "stop"
|
||||
return model_response
|
||||
except Exception as e:
|
||||
e.message = str(e)
|
||||
return exception_type(model=self.model, custom_llm_provider=self.custom_llm_provider, original_exception=e)
|
||||
|
||||
async def __anext__(self):
|
||||
try:
|
||||
|
@ -2796,7 +2978,6 @@ def read_config_args(config_path) -> dict:
|
|||
# read keys/ values from config file and return them
|
||||
return config
|
||||
except Exception as e:
|
||||
print("An error occurred while reading config:", str(e))
|
||||
raise e
|
||||
|
||||
########## experimental completion variants ############################
|
||||
|
@ -2899,7 +3080,6 @@ def get_model_split_test(models, completion_call_id):
|
|||
try:
|
||||
# make the api call
|
||||
last_fetched_at = time.time()
|
||||
print(f"last_fetched_at: {last_fetched_at}")
|
||||
response = requests.post(
|
||||
#http://api.litellm.ai
|
||||
url="http://api.litellm.ai/get_model_split_test", # get the updated dict from table or update the table with the dict
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "0.1.738"
|
||||
version = "0.1.739"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT License"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue