mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
fix exception mapping for streaming
This commit is contained in:
parent
f984e5f380
commit
889679a0dd
8 changed files with 766 additions and 100 deletions
Binary file not shown.
Binary file not shown.
|
@ -77,14 +77,16 @@ def handle_prediction_response_streaming(prediction_url, api_token, print_verbos
|
||||||
}
|
}
|
||||||
status = ""
|
status = ""
|
||||||
while True and (status not in ["succeeded", "failed", "canceled"]):
|
while True and (status not in ["succeeded", "failed", "canceled"]):
|
||||||
time.sleep(0.0001)
|
time.sleep(0.0001) # prevent being rate limited by replicate
|
||||||
response = requests.get(prediction_url, headers=headers)
|
response = requests.get(prediction_url, headers=headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
response_data = response.json()
|
response_data = response.json()
|
||||||
|
status = response_data['status']
|
||||||
|
print(f"response data: {response_data}")
|
||||||
if "output" in response_data:
|
if "output" in response_data:
|
||||||
output_string = "".join(response_data['output'])
|
output_string = "".join(response_data['output'])
|
||||||
new_output = output_string[len(previous_output):]
|
new_output = output_string[len(previous_output):]
|
||||||
yield new_output
|
yield {"output": new_output, "status": status}
|
||||||
previous_output = output_string
|
previous_output = output_string
|
||||||
status = response_data['status']
|
status = response_data['status']
|
||||||
|
|
||||||
|
|
|
@ -485,11 +485,11 @@ def completion(
|
||||||
# Setting the relevant API KEY for replicate, replicate defaults to using os.environ.get("REPLICATE_API_TOKEN")
|
# Setting the relevant API KEY for replicate, replicate defaults to using os.environ.get("REPLICATE_API_TOKEN")
|
||||||
replicate_key = None
|
replicate_key = None
|
||||||
replicate_key = (
|
replicate_key = (
|
||||||
get_secret("REPLICATE_API_KEY")
|
api_key
|
||||||
or get_secret("REPLICATE_API_TOKEN")
|
|
||||||
or api_key
|
|
||||||
or litellm.replicate_key
|
or litellm.replicate_key
|
||||||
or litellm.api_key
|
or litellm.api_key
|
||||||
|
or get_secret("REPLICATE_API_KEY")
|
||||||
|
or get_secret("REPLICATE_API_TOKEN")
|
||||||
)
|
)
|
||||||
|
|
||||||
model_response = replicate.completion(
|
model_response = replicate.completion(
|
||||||
|
@ -575,7 +575,7 @@ def completion(
|
||||||
|
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
# don't try to access stream object,
|
# don't try to access stream object,
|
||||||
response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph-alpha", logging_obj=logging)
|
response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph_alpha", logging_obj=logging)
|
||||||
return response
|
return response
|
||||||
response = model_response
|
response = model_response
|
||||||
elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
|
elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
|
||||||
|
@ -769,7 +769,7 @@ def completion(
|
||||||
if stream:
|
if stream:
|
||||||
model_response = chat.send_message_streaming(prompt, **optional_params)
|
model_response = chat.send_message_streaming(prompt, **optional_params)
|
||||||
response = CustomStreamWrapper(
|
response = CustomStreamWrapper(
|
||||||
model_response, model, custom_llm_provider="vertexai", logging_obj=logging
|
model_response, model, custom_llm_provider="vertex_ai", logging_obj=logging
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
|
@ -643,24 +643,6 @@ def test_completion_sagemaker():
|
||||||
|
|
||||||
# test_completion_sagemaker()
|
# test_completion_sagemaker()
|
||||||
|
|
||||||
def test_completion_sagemaker_stream():
|
|
||||||
litellm.set_verbose = False
|
|
||||||
try:
|
|
||||||
response = completion(
|
|
||||||
model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b",
|
|
||||||
messages=messages,
|
|
||||||
temperature=0.2,
|
|
||||||
max_tokens=80,
|
|
||||||
stream=True,
|
|
||||||
)
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
for chunk in response:
|
|
||||||
print(chunk)
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"Error occurred: {e}")
|
|
||||||
|
|
||||||
# test_completion_sagemaker_stream()
|
|
||||||
|
|
||||||
def test_completion_bedrock_titan():
|
def test_completion_bedrock_titan():
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
|
|
|
@ -9,7 +9,7 @@ sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion, acompletion
|
from litellm import completion, acompletion, AuthenticationError, InvalidRequestError
|
||||||
|
|
||||||
litellm.logging = False
|
litellm.logging = False
|
||||||
litellm.set_verbose = False
|
litellm.set_verbose = False
|
||||||
|
@ -187,6 +187,7 @@ def streaming_format_tests(idx, chunk):
|
||||||
finished = True
|
finished = True
|
||||||
if "content" in chunk["choices"][0]["delta"]:
|
if "content" in chunk["choices"][0]["delta"]:
|
||||||
extracted_chunk = chunk["choices"][0]["delta"]["content"]
|
extracted_chunk = chunk["choices"][0]["delta"]["content"]
|
||||||
|
print(f"extracted chunk: {extracted_chunk}")
|
||||||
return extracted_chunk, finished
|
return extracted_chunk, finished
|
||||||
|
|
||||||
def test_completion_cohere_stream():
|
def test_completion_cohere_stream():
|
||||||
|
@ -199,21 +200,120 @@ def test_completion_cohere_stream():
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
response = completion(
|
response = completion(
|
||||||
model="command-nightly", messages=messages, stream=True, max_tokens=50
|
model="command-nightly", messages=messages, stream=True, max_tokens=50,
|
||||||
)
|
)
|
||||||
complete_response = ""
|
complete_response = ""
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
|
has_finish_reason = False
|
||||||
for idx, chunk in enumerate(response):
|
for idx, chunk in enumerate(response):
|
||||||
chunk, finished = streaming_format_tests(idx, chunk)
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
has_finish_reason = finished
|
||||||
if finished:
|
if finished:
|
||||||
break
|
break
|
||||||
complete_response += chunk
|
complete_response += chunk
|
||||||
|
if has_finish_reason is False:
|
||||||
|
raise Exception("Finish reason not in final chunk")
|
||||||
if complete_response.strip() == "":
|
if complete_response.strip() == "":
|
||||||
raise Exception("Empty response received")
|
raise Exception("Empty response received")
|
||||||
print(f"completion_response: {complete_response}")
|
print(f"completion_response: {complete_response}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_cohere_stream()
|
||||||
|
|
||||||
|
def test_completion_cohere_stream_bad_key():
|
||||||
|
try:
|
||||||
|
api_key = "bad-key"
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "how does a court case get to the Supreme Court?",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
response = completion(
|
||||||
|
model="command-nightly", messages=messages, stream=True, max_tokens=50, api_key=api_key
|
||||||
|
)
|
||||||
|
complete_response = ""
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
has_finish_reason = False
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
has_finish_reason = finished
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
complete_response += chunk
|
||||||
|
if has_finish_reason is False:
|
||||||
|
raise Exception("Finish reason not in final chunk")
|
||||||
|
if complete_response.strip() == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
print(f"completion_response: {complete_response}")
|
||||||
|
except AuthenticationError as e:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_cohere_stream_bad_key()
|
||||||
|
|
||||||
|
# def test_completion_nlp_cloud():
|
||||||
|
# try:
|
||||||
|
# messages = [
|
||||||
|
# {"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
# {
|
||||||
|
# "role": "user",
|
||||||
|
# "content": "how does a court case get to the Supreme Court?",
|
||||||
|
# },
|
||||||
|
# ]
|
||||||
|
# response = completion(model="dolphin", messages=messages, stream=True)
|
||||||
|
# complete_response = ""
|
||||||
|
# # Add any assertions here to check the response
|
||||||
|
# has_finish_reason = False
|
||||||
|
# for idx, chunk in enumerate(response):
|
||||||
|
# chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
# has_finish_reason = finished
|
||||||
|
# complete_response += chunk
|
||||||
|
# if finished:
|
||||||
|
# break
|
||||||
|
# if has_finish_reason is False:
|
||||||
|
# raise Exception("Finish reason not in final chunk")
|
||||||
|
# if complete_response.strip() == "":
|
||||||
|
# raise Exception("Empty response received")
|
||||||
|
# print(f"completion_response: {complete_response}")
|
||||||
|
# except Exception as e:
|
||||||
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_nlp_cloud()
|
||||||
|
|
||||||
|
# def test_completion_nlp_cloud_bad_key():
|
||||||
|
# try:
|
||||||
|
# api_key = "bad-key"
|
||||||
|
# messages = [
|
||||||
|
# {"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
# {
|
||||||
|
# "role": "user",
|
||||||
|
# "content": "how does a court case get to the Supreme Court?",
|
||||||
|
# },
|
||||||
|
# ]
|
||||||
|
# response = completion(model="dolphin", messages=messages, stream=True, api_key=api_key)
|
||||||
|
# complete_response = ""
|
||||||
|
# # Add any assertions here to check the response
|
||||||
|
# has_finish_reason = False
|
||||||
|
# for idx, chunk in enumerate(response):
|
||||||
|
# chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
# has_finish_reason = finished
|
||||||
|
# complete_response += chunk
|
||||||
|
# if finished:
|
||||||
|
# break
|
||||||
|
# if has_finish_reason is False:
|
||||||
|
# raise Exception("Finish reason not in final chunk")
|
||||||
|
# if complete_response.strip() == "":
|
||||||
|
# raise Exception("Empty response received")
|
||||||
|
# print(f"completion_response: {complete_response}")
|
||||||
|
# except Exception as e:
|
||||||
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_nlp_cloud_bad_key()
|
||||||
|
|
||||||
# def test_completion_hf_stream():
|
# def test_completion_hf_stream():
|
||||||
# try:
|
# try:
|
||||||
# messages = [
|
# messages = [
|
||||||
|
@ -235,10 +335,41 @@ def test_completion_cohere_stream():
|
||||||
# if complete_response.strip() == "":
|
# if complete_response.strip() == "":
|
||||||
# raise Exception("Empty response received")
|
# raise Exception("Empty response received")
|
||||||
# print(f"completion_response: {complete_response}")
|
# print(f"completion_response: {complete_response}")
|
||||||
|
# except InvalidRequestError as e:
|
||||||
|
# pass
|
||||||
# except Exception as e:
|
# except Exception as e:
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
# test_completion_hf_stream()
|
# # test_completion_hf_stream()
|
||||||
|
|
||||||
|
# def test_completion_hf_stream_bad_key():
|
||||||
|
# try:
|
||||||
|
# api_key = "bad-key"
|
||||||
|
# messages = [
|
||||||
|
# {
|
||||||
|
# "content": "Hello! How are you today?",
|
||||||
|
# "role": "user"
|
||||||
|
# },
|
||||||
|
# ]
|
||||||
|
# response = completion(
|
||||||
|
# model="huggingface/meta-llama/Llama-2-7b-chat-hf", messages=messages, api_base="https://a8l9e3ucxinyl3oj.us-east-1.aws.endpoints.huggingface.cloud", stream=True, max_tokens=1000, api_key=api_key
|
||||||
|
# )
|
||||||
|
# complete_response = ""
|
||||||
|
# # Add any assertions here to check the response
|
||||||
|
# for idx, chunk in enumerate(response):
|
||||||
|
# chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
# if finished:
|
||||||
|
# break
|
||||||
|
# complete_response += chunk
|
||||||
|
# if complete_response.strip() == "":
|
||||||
|
# raise Exception("Empty response received")
|
||||||
|
# print(f"completion_response: {complete_response}")
|
||||||
|
# except InvalidRequestError as e:
|
||||||
|
# pass
|
||||||
|
# except Exception as e:
|
||||||
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_hf_stream_bad_key()
|
||||||
|
|
||||||
def test_completion_claude_stream():
|
def test_completion_claude_stream():
|
||||||
try:
|
try:
|
||||||
|
@ -266,9 +397,202 @@ def test_completion_claude_stream():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
# test_completion_claude_stream()
|
# test_completion_claude_stream()
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_claude_stream_bad_key():
|
||||||
|
try:
|
||||||
|
api_key = "bad-key"
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "how does a court case get to the Supreme Court?",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
response = completion(
|
||||||
|
model="claude-instant-1", messages=messages, stream=True, max_tokens=50, api_key=api_key
|
||||||
|
)
|
||||||
|
complete_response = ""
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
complete_response += chunk
|
||||||
|
if complete_response.strip() == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
print(f"completion_response: {complete_response}")
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# test_completion_claude_stream_bad_key()
|
||||||
|
|
||||||
|
def test_completion_replicate_stream():
|
||||||
|
try:
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "how does a court case get to the Supreme Court?",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
response = completion(
|
||||||
|
model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50
|
||||||
|
)
|
||||||
|
complete_response = ""
|
||||||
|
has_finish_reason = False
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
has_finish_reason = finished
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
complete_response += chunk
|
||||||
|
if has_finish_reason is False:
|
||||||
|
raise Exception("finish reason not set for last chunk")
|
||||||
|
if complete_response.strip() == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
print(f"completion_response: {complete_response}")
|
||||||
|
except InvalidRequestError as e:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
# test_completion_replicate_stream()
|
||||||
|
|
||||||
|
# def test_completion_vertexai_stream():
|
||||||
|
# try:
|
||||||
|
# import os
|
||||||
|
# os.environ["VERTEXAI_PROJECT"] = "pathrise-convert-1606954137718"
|
||||||
|
# os.environ["VERTEXAI_LOCATION"] = "us-central1"
|
||||||
|
# messages = [
|
||||||
|
# {"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
# {
|
||||||
|
# "role": "user",
|
||||||
|
# "content": "how does a court case get to the Supreme Court?",
|
||||||
|
# },
|
||||||
|
# ]
|
||||||
|
# response = completion(
|
||||||
|
# model="vertex_ai/chat-bison", messages=messages, stream=True, max_tokens=50
|
||||||
|
# )
|
||||||
|
# complete_response = ""
|
||||||
|
# has_finish_reason = False
|
||||||
|
# # Add any assertions here to check the response
|
||||||
|
# for idx, chunk in enumerate(response):
|
||||||
|
# chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
# has_finish_reason = finished
|
||||||
|
# if finished:
|
||||||
|
# break
|
||||||
|
# complete_response += chunk
|
||||||
|
# if has_finish_reason is False:
|
||||||
|
# raise Exception("finish reason not set for last chunk")
|
||||||
|
# if complete_response.strip() == "":
|
||||||
|
# raise Exception("Empty response received")
|
||||||
|
# print(f"completion_response: {complete_response}")
|
||||||
|
# except InvalidRequestError as e:
|
||||||
|
# pass
|
||||||
|
# except Exception as e:
|
||||||
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_vertexai_stream()
|
||||||
|
|
||||||
|
|
||||||
|
# def test_completion_vertexai_stream_bad_key():
|
||||||
|
# try:
|
||||||
|
# import os
|
||||||
|
# messages = [
|
||||||
|
# {"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
# {
|
||||||
|
# "role": "user",
|
||||||
|
# "content": "how does a court case get to the Supreme Court?",
|
||||||
|
# },
|
||||||
|
# ]
|
||||||
|
# response = completion(
|
||||||
|
# model="vertex_ai/chat-bison", messages=messages, stream=True, max_tokens=50
|
||||||
|
# )
|
||||||
|
# complete_response = ""
|
||||||
|
# has_finish_reason = False
|
||||||
|
# # Add any assertions here to check the response
|
||||||
|
# for idx, chunk in enumerate(response):
|
||||||
|
# chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
# has_finish_reason = finished
|
||||||
|
# if finished:
|
||||||
|
# break
|
||||||
|
# complete_response += chunk
|
||||||
|
# if has_finish_reason is False:
|
||||||
|
# raise Exception("finish reason not set for last chunk")
|
||||||
|
# if complete_response.strip() == "":
|
||||||
|
# raise Exception("Empty response received")
|
||||||
|
# print(f"completion_response: {complete_response}")
|
||||||
|
# except InvalidRequestError as e:
|
||||||
|
# pass
|
||||||
|
# except Exception as e:
|
||||||
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_vertexai_stream_bad_key()
|
||||||
|
|
||||||
|
def test_completion_replicate_stream():
|
||||||
|
try:
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "how does a court case get to the Supreme Court?",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
response = completion(
|
||||||
|
model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50
|
||||||
|
)
|
||||||
|
complete_response = ""
|
||||||
|
has_finish_reason = False
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
has_finish_reason = finished
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
complete_response += chunk
|
||||||
|
if has_finish_reason is False:
|
||||||
|
raise Exception("finish reason not set for last chunk")
|
||||||
|
if complete_response.strip() == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
print(f"completion_response: {complete_response}")
|
||||||
|
except InvalidRequestError as e:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
def test_completion_replicate_stream_bad_key():
|
||||||
|
try:
|
||||||
|
api_key = "bad-key"
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "how does a court case get to the Supreme Court?",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
response = completion(
|
||||||
|
model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50, api_key=api_key
|
||||||
|
)
|
||||||
|
complete_response = ""
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
complete_response += chunk
|
||||||
|
if complete_response.strip() == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
print(f"completion_response: {complete_response}")
|
||||||
|
except InvalidRequestError as e:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_replicate_stream_bad_key()
|
||||||
|
|
||||||
def test_completion_bedrock_ai21_stream():
|
def test_completion_bedrock_ai21_stream():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = False
|
|
||||||
response = completion(
|
response = completion(
|
||||||
model="bedrock/amazon.titan-tg1-large",
|
model="bedrock/amazon.titan-tg1-large",
|
||||||
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
|
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
|
||||||
|
@ -277,20 +601,82 @@ def test_completion_bedrock_ai21_stream():
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
complete_response = ""
|
complete_response = ""
|
||||||
|
has_finish_reason = False
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
|
||||||
for idx, chunk in enumerate(response):
|
for idx, chunk in enumerate(response):
|
||||||
chunk, finished = streaming_format_tests(idx, chunk)
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
has_finish_reason = finished
|
||||||
|
complete_response += chunk
|
||||||
if finished:
|
if finished:
|
||||||
break
|
break
|
||||||
complete_response += chunk
|
if has_finish_reason is False:
|
||||||
|
raise Exception("finish reason not set for last chunk")
|
||||||
if complete_response.strip() == "":
|
if complete_response.strip() == "":
|
||||||
raise Exception("Empty response received")
|
raise Exception("Empty response received")
|
||||||
|
print(f"completion_response: {complete_response}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_bedrock_ai21_stream()
|
||||||
|
|
||||||
# test_completion_cohere_stream()
|
def test_completion_bedrock_ai21_stream_bad_key():
|
||||||
|
try:
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/amazon.titan-tg1-large",
|
||||||
|
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
|
||||||
|
temperature=1,
|
||||||
|
max_tokens=4096,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
complete_response = ""
|
||||||
|
has_finish_reason = False
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
has_finish_reason = finished
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
complete_response += chunk
|
||||||
|
if has_finish_reason is False:
|
||||||
|
raise Exception("finish reason not set for last chunk")
|
||||||
|
if complete_response.strip() == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
print(f"completion_response: {complete_response}")
|
||||||
|
except InvalidRequestError as e:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_bedrock_ai21_stream_bad_key()
|
||||||
|
|
||||||
|
def test_completion_sagemaker_stream():
|
||||||
|
try:
|
||||||
|
response = completion(
|
||||||
|
model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b",
|
||||||
|
messages=messages,
|
||||||
|
temperature=0.2,
|
||||||
|
max_tokens=80,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
complete_response = ""
|
||||||
|
has_finish_reason = False
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
has_finish_reason = finished
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
complete_response += chunk
|
||||||
|
if has_finish_reason is False:
|
||||||
|
raise Exception("finish reason not set for last chunk")
|
||||||
|
if complete_response.strip() == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
except InvalidRequestError as e:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
test_completion_sagemaker_stream()
|
||||||
|
|
||||||
# test on openai completion call
|
# test on openai completion call
|
||||||
def test_openai_text_completion_call():
|
def test_openai_text_completion_call():
|
||||||
|
@ -314,7 +700,33 @@ def test_openai_text_completion_call():
|
||||||
def ai21_completion_call():
|
def ai21_completion_call():
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
model="j2-ultra", messages=messages, stream=True, logger_fn=logger_fn
|
model="j2-ultra", messages=messages, stream=True
|
||||||
|
)
|
||||||
|
print(f"response: {response}")
|
||||||
|
has_finished = False
|
||||||
|
complete_response = ""
|
||||||
|
start_time = time.time()
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
has_finished = finished
|
||||||
|
complete_response += chunk
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
if has_finished is False:
|
||||||
|
raise Exception("finished reason missing from final chunk")
|
||||||
|
if complete_response.strip() == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
print(f"completion_response: {complete_response}")
|
||||||
|
except:
|
||||||
|
pytest.fail(f"error occurred: {traceback.format_exc()}")
|
||||||
|
|
||||||
|
# ai21_completion_call()
|
||||||
|
|
||||||
|
def ai21_completion_call_bad_key():
|
||||||
|
try:
|
||||||
|
api_key = "bad-key"
|
||||||
|
response = completion(
|
||||||
|
model="j2-ultra", messages=messages, stream=True, api_key=api_key
|
||||||
)
|
)
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
complete_response = ""
|
complete_response = ""
|
||||||
|
@ -327,10 +739,64 @@ def ai21_completion_call():
|
||||||
if complete_response.strip() == "":
|
if complete_response.strip() == "":
|
||||||
raise Exception("Empty response received")
|
raise Exception("Empty response received")
|
||||||
print(f"completion_response: {complete_response}")
|
print(f"completion_response: {complete_response}")
|
||||||
|
except InvalidRequestError as e:
|
||||||
|
pass
|
||||||
except:
|
except:
|
||||||
pytest.fail(f"error occurred: {traceback.format_exc()}")
|
pytest.fail(f"error occurred: {traceback.format_exc()}")
|
||||||
|
|
||||||
# ai21_completion_call()
|
# ai21_completion_call_bad_key()
|
||||||
|
|
||||||
|
def test_completion_aleph_alpha():
|
||||||
|
try:
|
||||||
|
response = completion(
|
||||||
|
model="luminous-base", messages=messages, stream=True
|
||||||
|
)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
has_finished = False
|
||||||
|
complete_response = ""
|
||||||
|
start_time = time.time()
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
has_finished = finished
|
||||||
|
complete_response += chunk
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
if has_finished is False:
|
||||||
|
raise Exception("finished reason missing from final chunk")
|
||||||
|
if complete_response.strip() == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_aleph_alpha()
|
||||||
|
|
||||||
|
# def test_completion_aleph_alpha_bad_key():
|
||||||
|
# try:
|
||||||
|
# api_key = "bad-key"
|
||||||
|
# response = completion(
|
||||||
|
# model="luminous-base", messages=messages, stream=True, api_key=api_key
|
||||||
|
# )
|
||||||
|
# # Add any assertions here to check the response
|
||||||
|
# has_finished = False
|
||||||
|
# complete_response = ""
|
||||||
|
# start_time = time.time()
|
||||||
|
# for idx, chunk in enumerate(response):
|
||||||
|
# chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
# has_finished = finished
|
||||||
|
# complete_response += chunk
|
||||||
|
# if finished:
|
||||||
|
# break
|
||||||
|
# if has_finished is False:
|
||||||
|
# raise Exception("finished reason missing from final chunk")
|
||||||
|
# if complete_response.strip() == "":
|
||||||
|
# raise Exception("Empty response received")
|
||||||
|
# except InvalidRequestError as e:
|
||||||
|
# pass
|
||||||
|
# except Exception as e:
|
||||||
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_aleph_alpha_bad_key()
|
||||||
|
|
||||||
# test on openai completion call
|
# test on openai completion call
|
||||||
def test_openai_chat_completion_call():
|
def test_openai_chat_completion_call():
|
||||||
try:
|
try:
|
||||||
|
@ -366,11 +832,15 @@ def test_together_ai_completion_call_starcoder():
|
||||||
)
|
)
|
||||||
complete_response = ""
|
complete_response = ""
|
||||||
print(f"returned response object: {response}")
|
print(f"returned response object: {response}")
|
||||||
|
has_finish_reason = False
|
||||||
for idx, chunk in enumerate(response):
|
for idx, chunk in enumerate(response):
|
||||||
chunk, finished = streaming_format_tests(idx, chunk)
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
has_finish_reason = finished
|
||||||
if finished:
|
if finished:
|
||||||
break
|
break
|
||||||
complete_response += chunk
|
complete_response += chunk
|
||||||
|
if has_finish_reason is False:
|
||||||
|
raise Exception("Finish reason not set for last chunk")
|
||||||
if complete_response == "":
|
if complete_response == "":
|
||||||
raise Exception("Empty response received")
|
raise Exception("Empty response received")
|
||||||
print(f"complete response: {complete_response}")
|
print(f"complete response: {complete_response}")
|
||||||
|
@ -378,6 +848,38 @@ def test_together_ai_completion_call_starcoder():
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# test_together_ai_completion_call_starcoder()
|
||||||
|
|
||||||
|
def test_together_ai_completion_call_starcoder_bad_key():
|
||||||
|
try:
|
||||||
|
api_key = "bad-key"
|
||||||
|
start_time = time.time()
|
||||||
|
response = completion(
|
||||||
|
model="together_ai/bigcode/starcoder",
|
||||||
|
messages=messages,
|
||||||
|
stream=True,
|
||||||
|
api_key=api_key
|
||||||
|
)
|
||||||
|
complete_response = ""
|
||||||
|
has_finish_reason = False
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
has_finish_reason = finished
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
complete_response += chunk
|
||||||
|
if has_finish_reason is False:
|
||||||
|
raise Exception("Finish reason not set for last chunk")
|
||||||
|
if complete_response == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
print(f"complete response: {complete_response}")
|
||||||
|
except InvalidRequestError as e:
|
||||||
|
pass
|
||||||
|
except:
|
||||||
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
# test_together_ai_completion_call_starcoder_bad_key()
|
||||||
#### Test Function calling + streaming ####
|
#### Test Function calling + streaming ####
|
||||||
|
|
||||||
def test_completion_openai_with_functions():
|
def test_completion_openai_with_functions():
|
||||||
|
|
270
litellm/utils.py
270
litellm/utils.py
|
@ -2,6 +2,7 @@ import sys
|
||||||
import dotenv, json, traceback, threading
|
import dotenv, json, traceback, threading
|
||||||
import subprocess, os
|
import subprocess, os
|
||||||
import litellm, openai
|
import litellm, openai
|
||||||
|
import itertools
|
||||||
import random, uuid, requests
|
import random, uuid, requests
|
||||||
import datetime, time
|
import datetime, time
|
||||||
import tiktoken
|
import tiktoken
|
||||||
|
@ -1915,7 +1916,6 @@ def exception_type(
|
||||||
):
|
):
|
||||||
global user_logger_fn, liteDebuggerClient
|
global user_logger_fn, liteDebuggerClient
|
||||||
exception_mapping_worked = False
|
exception_mapping_worked = False
|
||||||
|
|
||||||
if litellm.set_verbose == True:
|
if litellm.set_verbose == True:
|
||||||
litellm.error_logs['EXCEPTION'] = original_exception
|
litellm.error_logs['EXCEPTION'] = original_exception
|
||||||
litellm.error_logs['KWARGS'] = completion_kwargs
|
litellm.error_logs['KWARGS'] = completion_kwargs
|
||||||
|
@ -1970,7 +1970,7 @@ def exception_type(
|
||||||
exception_type = type(original_exception).__name__
|
exception_type = type(original_exception).__name__
|
||||||
else:
|
else:
|
||||||
exception_type = ""
|
exception_type = ""
|
||||||
if "claude" in model: # one of the anthropics
|
if custom_llm_provider == "anthropic": # one of the anthropics
|
||||||
if hasattr(original_exception, "message"):
|
if hasattr(original_exception, "message"):
|
||||||
if "prompt is too long" in original_exception.message:
|
if "prompt is too long" in original_exception.message:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -1979,6 +1979,13 @@ def exception_type(
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="anthropic"
|
llm_provider="anthropic"
|
||||||
)
|
)
|
||||||
|
if "Invalid API Key" in original_exception.message:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise AuthenticationError(
|
||||||
|
message=original_exception.message,
|
||||||
|
model=model,
|
||||||
|
llm_provider="anthropic"
|
||||||
|
)
|
||||||
if hasattr(original_exception, "status_code"):
|
if hasattr(original_exception, "status_code"):
|
||||||
print_verbose(f"status_code: {original_exception.status_code}")
|
print_verbose(f"status_code: {original_exception.status_code}")
|
||||||
if original_exception.status_code == 401:
|
if original_exception.status_code == 401:
|
||||||
|
@ -2031,7 +2038,7 @@ def exception_type(
|
||||||
llm_provider="anthropic",
|
llm_provider="anthropic",
|
||||||
model=model
|
model=model
|
||||||
)
|
)
|
||||||
elif "replicate" in model:
|
elif custom_llm_provider == "replicate":
|
||||||
if "Incorrect authentication token" in error_str:
|
if "Incorrect authentication token" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(
|
raise AuthenticationError(
|
||||||
|
@ -2068,7 +2075,7 @@ def exception_type(
|
||||||
llm_provider="replicate",
|
llm_provider="replicate",
|
||||||
model=model
|
model=model
|
||||||
)
|
)
|
||||||
elif original_exception.status_code == 400:
|
elif original_exception.status_code == 400 or original_exception.status_code == 422:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise InvalidRequestError(
|
raise InvalidRequestError(
|
||||||
message=f"ReplicateException - {original_exception.message}",
|
message=f"ReplicateException - {original_exception.message}",
|
||||||
|
@ -2110,7 +2117,31 @@ def exception_type(
|
||||||
llm_provider="replicate",
|
llm_provider="replicate",
|
||||||
model=model
|
model=model
|
||||||
)
|
)
|
||||||
elif model in litellm.cohere_models or custom_llm_provider == "cohere": # Cohere
|
elif custom_llm_provider == "bedrock":
|
||||||
|
if "Unable to locate credentials" in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise InvalidRequestError(
|
||||||
|
message=f"BedrockException - {error_str}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="bedrock"
|
||||||
|
)
|
||||||
|
elif custom_llm_provider == "sagemaker":
|
||||||
|
if "Unable to locate credentials" in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise InvalidRequestError(
|
||||||
|
message=f"SagemakerException - {error_str}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="sagemaker"
|
||||||
|
)
|
||||||
|
elif custom_llm_provider == "vertex_ai":
|
||||||
|
if "Vertex AI API has not been used in project" in error_str or "Unable to find your project" in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise InvalidRequestError(
|
||||||
|
message=f"VertexAIException - {error_str}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="vertex_ai"
|
||||||
|
)
|
||||||
|
elif custom_llm_provider == "cohere": # Cohere
|
||||||
if (
|
if (
|
||||||
"invalid api token" in error_str
|
"invalid api token" in error_str
|
||||||
or "No API key provided." in error_str
|
or "No API key provided." in error_str
|
||||||
|
@ -2184,6 +2215,13 @@ def exception_type(
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="huggingface"
|
llm_provider="huggingface"
|
||||||
)
|
)
|
||||||
|
elif "A valid user token is required" in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise InvalidRequestError(
|
||||||
|
message=error_str,
|
||||||
|
llm_provider="huggingface",
|
||||||
|
model=model
|
||||||
|
)
|
||||||
if hasattr(original_exception, "status_code"):
|
if hasattr(original_exception, "status_code"):
|
||||||
if original_exception.status_code == 401:
|
if original_exception.status_code == 401:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -2221,6 +2259,8 @@ def exception_type(
|
||||||
llm_provider="huggingface",
|
llm_provider="huggingface",
|
||||||
model=model
|
model=model
|
||||||
)
|
)
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise APIError(status_code=500, message=error_str, model=model, llm_provider=custom_llm_provider)
|
||||||
elif custom_llm_provider == "ai21":
|
elif custom_llm_provider == "ai21":
|
||||||
if hasattr(original_exception, "message"):
|
if hasattr(original_exception, "message"):
|
||||||
if "Prompt has too many tokens" in original_exception.message:
|
if "Prompt has too many tokens" in original_exception.message:
|
||||||
|
@ -2230,6 +2270,13 @@ def exception_type(
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="ai21"
|
llm_provider="ai21"
|
||||||
)
|
)
|
||||||
|
if "Bad or missing API token." in original_exception.message:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise InvalidRequestError(
|
||||||
|
message=f"AI21Exception - {original_exception.message}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="ai21"
|
||||||
|
)
|
||||||
if hasattr(original_exception, "status_code"):
|
if hasattr(original_exception, "status_code"):
|
||||||
if original_exception.status_code == 401:
|
if original_exception.status_code == 401:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -2266,7 +2313,7 @@ def exception_type(
|
||||||
llm_provider="ai21",
|
llm_provider="ai21",
|
||||||
model=model
|
model=model
|
||||||
)
|
)
|
||||||
elif model in litellm.nlp_cloud_models or custom_llm_provider == "nlp_cloud":
|
elif custom_llm_provider == "nlp_cloud":
|
||||||
if "detail" in error_str:
|
if "detail" in error_str:
|
||||||
if "Input text length should not exceed" in error_str:
|
if "Input text length should not exceed" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -2342,6 +2389,7 @@ def exception_type(
|
||||||
model=model
|
model=model
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "together_ai":
|
elif custom_llm_provider == "together_ai":
|
||||||
|
import json
|
||||||
error_response = json.loads(error_str)
|
error_response = json.loads(error_str)
|
||||||
if "error" in error_response and "`inputs` tokens + `max_new_tokens` must be <=" in error_response["error"]:
|
if "error" in error_response and "`inputs` tokens + `max_new_tokens` must be <=" in error_response["error"]:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -2364,6 +2412,13 @@ def exception_type(
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="together_ai"
|
llm_provider="together_ai"
|
||||||
)
|
)
|
||||||
|
elif "error" in error_response and "API key doesn't match expected format." in error_response["error"]:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise InvalidRequestError(
|
||||||
|
message=f"TogetherAIException - {error_response['error']}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="together_ai"
|
||||||
|
)
|
||||||
elif "error_type" in error_response and error_response["error_type"] == "validation":
|
elif "error_type" in error_response and error_response["error_type"] == "validation":
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise InvalidRequestError(
|
raise InvalidRequestError(
|
||||||
|
@ -2393,7 +2448,7 @@ def exception_type(
|
||||||
llm_provider="together_ai",
|
llm_provider="together_ai",
|
||||||
model=model
|
model=model
|
||||||
)
|
)
|
||||||
elif model in litellm.aleph_alpha_models:
|
elif custom_llm_provider == "aleph_alpha":
|
||||||
if "This is longer than the model's maximum context length" in error_str:
|
if "This is longer than the model's maximum context length" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise ContextWindowExceededError(
|
raise ContextWindowExceededError(
|
||||||
|
@ -2401,6 +2456,13 @@ def exception_type(
|
||||||
llm_provider="aleph_alpha",
|
llm_provider="aleph_alpha",
|
||||||
model=model
|
model=model
|
||||||
)
|
)
|
||||||
|
elif "InvalidToken" in error_str or "No token provided" in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise InvalidRequestError(
|
||||||
|
message=f"AlephAlphaException - {original_exception.message}",
|
||||||
|
llm_provider="aleph_alpha",
|
||||||
|
model=model
|
||||||
|
)
|
||||||
elif hasattr(original_exception, "status_code"):
|
elif hasattr(original_exception, "status_code"):
|
||||||
print(f"status code: {original_exception.status_code}")
|
print(f"status code: {original_exception.status_code}")
|
||||||
if original_exception.status_code == 401:
|
if original_exception.status_code == 401:
|
||||||
|
@ -2445,7 +2507,8 @@ def exception_type(
|
||||||
elif custom_llm_provider == "ollama":
|
elif custom_llm_provider == "ollama":
|
||||||
if "no attribute 'async_get_ollama_response_stream" in error_str:
|
if "no attribute 'async_get_ollama_response_stream" in error_str:
|
||||||
raise ImportError("Import error - trying to use async for ollama. import async_generator failed. Try 'pip install async_generator'")
|
raise ImportError("Import error - trying to use async for ollama. import async_generator failed. Try 'pip install async_generator'")
|
||||||
raise original_exception
|
exception_mapping_worked = True
|
||||||
|
raise APIError(status_code=500, message=str(original_exception), llm_provider=custom_llm_provider, model=model)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# LOGGING
|
# LOGGING
|
||||||
exception_logging(
|
exception_logging(
|
||||||
|
@ -2563,6 +2626,7 @@ class CustomStreamWrapper:
|
||||||
self.logging_obj = logging_obj
|
self.logging_obj = logging_obj
|
||||||
self.completion_stream = completion_stream
|
self.completion_stream = completion_stream
|
||||||
self.sent_first_chunk = False
|
self.sent_first_chunk = False
|
||||||
|
self.sent_last_chunk = False
|
||||||
if self.logging_obj:
|
if self.logging_obj:
|
||||||
# Log the type of the received item
|
# Log the type of the received item
|
||||||
self.logging_obj.post_call(str(type(completion_stream)))
|
self.logging_obj.post_call(str(type(completion_stream)))
|
||||||
|
@ -2579,41 +2643,71 @@ class CustomStreamWrapper:
|
||||||
|
|
||||||
def handle_anthropic_chunk(self, chunk):
|
def handle_anthropic_chunk(self, chunk):
|
||||||
str_line = chunk.decode("utf-8") # Convert bytes to string
|
str_line = chunk.decode("utf-8") # Convert bytes to string
|
||||||
|
print(f"str_line: {str_line}")
|
||||||
|
text = ""
|
||||||
|
is_finished = False
|
||||||
|
finish_reason = None
|
||||||
if str_line.startswith("data:"):
|
if str_line.startswith("data:"):
|
||||||
data_json = json.loads(str_line[5:])
|
data_json = json.loads(str_line[5:])
|
||||||
return data_json.get("completion", "")
|
text = data_json.get("completion", "")
|
||||||
return ""
|
if data_json.get("stop_reason", None):
|
||||||
|
is_finished = True
|
||||||
|
finish_reason = data_json["stop_reason"]
|
||||||
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
|
elif "error" in str_line:
|
||||||
|
raise ValueError(f"Unable to parse response. Original response: {str_line}")
|
||||||
|
else:
|
||||||
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
|
|
||||||
def handle_together_ai_chunk(self, chunk):
|
def handle_together_ai_chunk(self, chunk):
|
||||||
chunk = chunk.decode("utf-8")
|
chunk = chunk.decode("utf-8")
|
||||||
|
text = ""
|
||||||
|
is_finished = False
|
||||||
|
finish_reason = None
|
||||||
|
if "text" in chunk:
|
||||||
text_index = chunk.find('"text":"') # this checks if text: exists
|
text_index = chunk.find('"text":"') # this checks if text: exists
|
||||||
text_start = text_index + len('"text":"')
|
text_start = text_index + len('"text":"')
|
||||||
text_end = chunk.find('"}', text_start)
|
text_end = chunk.find('"}', text_start)
|
||||||
if text_index != -1 and text_end != -1:
|
if text_index != -1 and text_end != -1:
|
||||||
extracted_text = chunk[text_start:text_end]
|
extracted_text = chunk[text_start:text_end]
|
||||||
return extracted_text
|
text = extracted_text
|
||||||
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
|
elif "[DONE]" in chunk:
|
||||||
|
return {"text": text, "is_finished": True, "finish_reason": "stop"}
|
||||||
|
elif "error" in chunk:
|
||||||
|
raise ValueError(chunk)
|
||||||
else:
|
else:
|
||||||
return ""
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
|
|
||||||
def handle_huggingface_chunk(self, chunk):
|
def handle_huggingface_chunk(self, chunk):
|
||||||
chunk = chunk.decode("utf-8")
|
chunk = chunk.decode("utf-8")
|
||||||
|
text = ""
|
||||||
|
is_finished = False
|
||||||
|
finish_reason = ""
|
||||||
if chunk.startswith("data:"):
|
if chunk.startswith("data:"):
|
||||||
data_json = json.loads(chunk[5:])
|
data_json = json.loads(chunk[5:])
|
||||||
|
print(f"data json: {data_json}")
|
||||||
if "token" in data_json and "text" in data_json["token"]:
|
if "token" in data_json and "text" in data_json["token"]:
|
||||||
text = data_json["token"]["text"]
|
text = data_json["token"]["text"]
|
||||||
if "meta-llama/Llama-2" in self.model: #clean eos tokens like </s> from the returned output text
|
if "meta-llama/Llama-2" in self.model: #clean eos tokens like </s> from the returned output text
|
||||||
if any(token in text for token in llama_2_special_tokens):
|
if any(token in text for token in llama_2_special_tokens):
|
||||||
text = text.replace("<s>", "").replace("</s>", "")
|
text = text.replace("<s>", "").replace("</s>", "")
|
||||||
return text
|
if data_json.get("details", False) and data_json["details"].get("finish_reason", False):
|
||||||
else:
|
is_finished = True
|
||||||
return ""
|
finish_reason = data_json["details"]["finish_reason"]
|
||||||
return ""
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
|
elif "error" in chunk:
|
||||||
|
raise ValueError(chunk)
|
||||||
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
|
|
||||||
def handle_ai21_chunk(self, chunk):
|
def handle_ai21_chunk(self, chunk): # fake streaming
|
||||||
chunk = chunk.decode("utf-8")
|
chunk = chunk.decode("utf-8")
|
||||||
data_json = json.loads(chunk)
|
data_json = json.loads(chunk)
|
||||||
try:
|
try:
|
||||||
return data_json["completions"][0]["data"]["text"]
|
text = data_json["completions"][0]["data"]["text"]
|
||||||
|
is_finished = True
|
||||||
|
finish_reason = "stop"
|
||||||
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
except:
|
except:
|
||||||
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
||||||
|
|
||||||
|
@ -2621,8 +2715,10 @@ class CustomStreamWrapper:
|
||||||
chunk = chunk.decode("utf-8")
|
chunk = chunk.decode("utf-8")
|
||||||
data_json = json.loads(chunk)
|
data_json = json.loads(chunk)
|
||||||
try:
|
try:
|
||||||
print(f"data json: {data_json}")
|
text = data_json["generated_text"]
|
||||||
return data_json["generated_text"]
|
is_finished = True
|
||||||
|
finish_reason = "stop"
|
||||||
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
except:
|
except:
|
||||||
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
||||||
|
|
||||||
|
@ -2630,7 +2726,10 @@ class CustomStreamWrapper:
|
||||||
chunk = chunk.decode("utf-8")
|
chunk = chunk.decode("utf-8")
|
||||||
data_json = json.loads(chunk)
|
data_json = json.loads(chunk)
|
||||||
try:
|
try:
|
||||||
return data_json["completions"][0]["completion"]
|
text = data_json["completions"][0]["completion"]
|
||||||
|
is_finished = True
|
||||||
|
finish_reason = "stop"
|
||||||
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
except:
|
except:
|
||||||
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
||||||
|
|
||||||
|
@ -2638,7 +2737,35 @@ class CustomStreamWrapper:
|
||||||
chunk = chunk.decode("utf-8")
|
chunk = chunk.decode("utf-8")
|
||||||
data_json = json.loads(chunk)
|
data_json = json.loads(chunk)
|
||||||
try:
|
try:
|
||||||
return data_json["text"]
|
text = ""
|
||||||
|
is_finished = False
|
||||||
|
finish_reason = ""
|
||||||
|
if "text" in data_json:
|
||||||
|
text = data_json["text"]
|
||||||
|
elif "is_finished" in data_json:
|
||||||
|
is_finished = data_json["is_finished"]
|
||||||
|
finish_reason = data_json["finish_reason"]
|
||||||
|
else:
|
||||||
|
raise Exception(data_json)
|
||||||
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
|
except:
|
||||||
|
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
||||||
|
|
||||||
|
def handle_replicate_chunk(self, chunk):
|
||||||
|
print(f"chunk: {chunk}")
|
||||||
|
try:
|
||||||
|
text = ""
|
||||||
|
is_finished = False
|
||||||
|
finish_reason = ""
|
||||||
|
if "output" in chunk:
|
||||||
|
text = chunk['output']
|
||||||
|
if "status" in chunk:
|
||||||
|
if chunk["status"] == "succeeded":
|
||||||
|
is_finished = True
|
||||||
|
finish_reason = "stop"
|
||||||
|
elif chunk.get("error", None):
|
||||||
|
raise Exception(chunk["error"])
|
||||||
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
except:
|
except:
|
||||||
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
||||||
|
|
||||||
|
@ -2683,13 +2810,21 @@ class CustomStreamWrapper:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def handle_bedrock_stream(self):
|
def handle_bedrock_stream(self, chunk):
|
||||||
if self.completion_stream:
|
chunk = chunk.get('chunk')
|
||||||
event = next(self.completion_stream)
|
|
||||||
chunk = event.get('chunk')
|
|
||||||
if chunk:
|
if chunk:
|
||||||
chunk_data = json.loads(chunk.get('bytes').decode())
|
chunk_data = json.loads(chunk.get('bytes').decode())
|
||||||
return chunk_data['outputText']
|
text = ""
|
||||||
|
is_finished = False
|
||||||
|
finish_reason = ""
|
||||||
|
if "outputText" in chunk_data:
|
||||||
|
text = chunk_data['outputText']
|
||||||
|
if chunk_data.get("completionReason", None):
|
||||||
|
is_finished = True
|
||||||
|
finish_reason = chunk_data["completionReason"]
|
||||||
|
elif chunk.get("error", None):
|
||||||
|
raise Exception(chunk["error"])
|
||||||
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
## needs to handle the empty string case (even starting chunk can be an empty string)
|
## needs to handle the empty string case (even starting chunk can be an empty string)
|
||||||
|
@ -2701,49 +2836,94 @@ class CustomStreamWrapper:
|
||||||
completion_obj = {"content": ""}
|
completion_obj = {"content": ""}
|
||||||
if self.custom_llm_provider and self.custom_llm_provider == "anthropic":
|
if self.custom_llm_provider and self.custom_llm_provider == "anthropic":
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = self.handle_anthropic_chunk(chunk)
|
response_obj = self.handle_anthropic_chunk(chunk)
|
||||||
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
if response_obj["is_finished"]:
|
||||||
|
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||||
elif self.model == "replicate" or self.custom_llm_provider == "replicate":
|
elif self.model == "replicate" or self.custom_llm_provider == "replicate":
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = chunk
|
response_obj = self.handle_replicate_chunk(chunk)
|
||||||
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
if response_obj["is_finished"]:
|
||||||
|
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||||
elif (
|
elif (
|
||||||
self.custom_llm_provider and self.custom_llm_provider == "together_ai"):
|
self.custom_llm_provider and self.custom_llm_provider == "together_ai"):
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
text_data = self.handle_together_ai_chunk(chunk)
|
response_obj = self.handle_together_ai_chunk(chunk)
|
||||||
if text_data == "":
|
completion_obj["content"] = response_obj["text"]
|
||||||
return self.__next__()
|
if response_obj["is_finished"]:
|
||||||
completion_obj["content"] = text_data
|
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||||
elif self.custom_llm_provider and self.custom_llm_provider == "huggingface":
|
elif self.custom_llm_provider and self.custom_llm_provider == "huggingface":
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = self.handle_huggingface_chunk(chunk)
|
response_obj = self.handle_huggingface_chunk(chunk)
|
||||||
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
if response_obj["is_finished"]:
|
||||||
|
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||||
elif self.custom_llm_provider and self.custom_llm_provider == "baseten": # baseten doesn't provide streaming
|
elif self.custom_llm_provider and self.custom_llm_provider == "baseten": # baseten doesn't provide streaming
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = self.handle_baseten_chunk(chunk)
|
completion_obj["content"] = self.handle_baseten_chunk(chunk)
|
||||||
elif self.custom_llm_provider and self.custom_llm_provider == "ai21": #ai21 doesn't provide streaming
|
elif self.custom_llm_provider and self.custom_llm_provider == "ai21": #ai21 doesn't provide streaming
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = self.handle_ai21_chunk(chunk)
|
response_obj = self.handle_ai21_chunk(chunk)
|
||||||
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
if response_obj["is_finished"]:
|
||||||
|
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||||
elif self.custom_llm_provider and self.custom_llm_provider == "vllm":
|
elif self.custom_llm_provider and self.custom_llm_provider == "vllm":
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = chunk[0].outputs[0].text
|
completion_obj["content"] = chunk[0].outputs[0].text
|
||||||
elif self.custom_llm_provider and self.custom_llm_provider == "aleph-alpha": #aleph alpha doesn't provide streaming
|
elif self.custom_llm_provider and self.custom_llm_provider == "aleph_alpha": #aleph alpha doesn't provide streaming
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = self.handle_aleph_alpha_chunk(chunk)
|
response_obj = self.handle_aleph_alpha_chunk(chunk)
|
||||||
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
if response_obj["is_finished"]:
|
||||||
|
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||||
elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai":
|
elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai":
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
|
completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
|
||||||
elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud":
|
elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud":
|
||||||
|
try:
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = self.handle_nlp_cloud_chunk(chunk)
|
response_obj = self.handle_nlp_cloud_chunk(chunk)
|
||||||
elif self.model in (litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models):
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
if response_obj["is_finished"]:
|
||||||
|
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||||
|
except Exception as e:
|
||||||
|
if self.sent_last_chunk:
|
||||||
|
raise e
|
||||||
|
else:
|
||||||
|
if self.sent_first_chunk is False:
|
||||||
|
raise Exception("An unknown error occurred with the stream")
|
||||||
|
model_response.choices[0].finish_reason = "stop"
|
||||||
|
self.sent_last_chunk = True
|
||||||
|
elif self.custom_llm_provider and self.custom_llm_provider == "vertex_ai":
|
||||||
|
try:
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = str(chunk)
|
completion_obj["content"] = str(chunk)
|
||||||
|
except StopIteration as e:
|
||||||
|
if self.sent_last_chunk:
|
||||||
|
raise e
|
||||||
|
else:
|
||||||
|
model_response.choices[0].finish_reason = "stop"
|
||||||
|
self.sent_last_chunk = True
|
||||||
elif self.custom_llm_provider == "cohere":
|
elif self.custom_llm_provider == "cohere":
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
completion_obj["content"] = self.handle_cohere_chunk(chunk)
|
response_obj = self.handle_cohere_chunk(chunk)
|
||||||
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
if response_obj["is_finished"]:
|
||||||
|
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||||
elif self.custom_llm_provider == "bedrock":
|
elif self.custom_llm_provider == "bedrock":
|
||||||
completion_obj["content"] = self.handle_bedrock_stream()
|
chunk = next(self.completion_stream)
|
||||||
|
response_obj = self.handle_bedrock_stream(chunk)
|
||||||
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
if response_obj["is_finished"]:
|
||||||
|
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||||
elif self.custom_llm_provider == "sagemaker":
|
elif self.custom_llm_provider == "sagemaker":
|
||||||
if len(self.completion_stream)==0:
|
if len(self.completion_stream)==0:
|
||||||
|
if self.sent_last_chunk:
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
|
else:
|
||||||
|
model_response.choices[0].finish_reason = "stop"
|
||||||
|
self.sent_last_chunk = True
|
||||||
chunk_size = 30
|
chunk_size = 30
|
||||||
new_chunk = self.completion_stream[:chunk_size]
|
new_chunk = self.completion_stream[:chunk_size]
|
||||||
completion_obj["content"] = new_chunk
|
completion_obj["content"] = new_chunk
|
||||||
|
@ -2765,11 +2945,13 @@ class CustomStreamWrapper:
|
||||||
self.sent_first_chunk = True
|
self.sent_first_chunk = True
|
||||||
model_response.choices[0].delta = Delta(**completion_obj)
|
model_response.choices[0].delta = Delta(**completion_obj)
|
||||||
return model_response
|
return model_response
|
||||||
|
elif model_response.choices[0].finish_reason:
|
||||||
|
return model_response
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
model_response.choices[0].finish_reason = "stop"
|
e.message = str(e)
|
||||||
return model_response
|
return exception_type(model=self.model, custom_llm_provider=self.custom_llm_provider, original_exception=e)
|
||||||
|
|
||||||
async def __anext__(self):
|
async def __anext__(self):
|
||||||
try:
|
try:
|
||||||
|
@ -2796,7 +2978,6 @@ def read_config_args(config_path) -> dict:
|
||||||
# read keys/ values from config file and return them
|
# read keys/ values from config file and return them
|
||||||
return config
|
return config
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("An error occurred while reading config:", str(e))
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
########## experimental completion variants ############################
|
########## experimental completion variants ############################
|
||||||
|
@ -2899,7 +3080,6 @@ def get_model_split_test(models, completion_call_id):
|
||||||
try:
|
try:
|
||||||
# make the api call
|
# make the api call
|
||||||
last_fetched_at = time.time()
|
last_fetched_at = time.time()
|
||||||
print(f"last_fetched_at: {last_fetched_at}")
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
#http://api.litellm.ai
|
#http://api.litellm.ai
|
||||||
url="http://api.litellm.ai/get_model_split_test", # get the updated dict from table or update the table with the dict
|
url="http://api.litellm.ai/get_model_split_test", # get the updated dict from table or update the table with the dict
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.1.738"
|
version = "0.1.739"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue