fix exception mapping for streaming

This commit is contained in:
Krrish Dholakia 2023-09-23 15:04:34 -07:00
parent f984e5f380
commit 889679a0dd
8 changed files with 766 additions and 100 deletions

View file

@ -77,14 +77,16 @@ def handle_prediction_response_streaming(prediction_url, api_token, print_verbos
} }
status = "" status = ""
while True and (status not in ["succeeded", "failed", "canceled"]): while True and (status not in ["succeeded", "failed", "canceled"]):
time.sleep(0.0001) time.sleep(0.0001) # prevent being rate limited by replicate
response = requests.get(prediction_url, headers=headers) response = requests.get(prediction_url, headers=headers)
if response.status_code == 200: if response.status_code == 200:
response_data = response.json() response_data = response.json()
status = response_data['status']
print(f"response data: {response_data}")
if "output" in response_data: if "output" in response_data:
output_string = "".join(response_data['output']) output_string = "".join(response_data['output'])
new_output = output_string[len(previous_output):] new_output = output_string[len(previous_output):]
yield new_output yield {"output": new_output, "status": status}
previous_output = output_string previous_output = output_string
status = response_data['status'] status = response_data['status']

View file

@ -485,11 +485,11 @@ def completion(
# Setting the relevant API KEY for replicate, replicate defaults to using os.environ.get("REPLICATE_API_TOKEN") # Setting the relevant API KEY for replicate, replicate defaults to using os.environ.get("REPLICATE_API_TOKEN")
replicate_key = None replicate_key = None
replicate_key = ( replicate_key = (
get_secret("REPLICATE_API_KEY") api_key
or get_secret("REPLICATE_API_TOKEN")
or api_key
or litellm.replicate_key or litellm.replicate_key
or litellm.api_key or litellm.api_key
or get_secret("REPLICATE_API_KEY")
or get_secret("REPLICATE_API_TOKEN")
) )
model_response = replicate.completion( model_response = replicate.completion(
@ -575,7 +575,7 @@ def completion(
if "stream" in optional_params and optional_params["stream"] == True: if "stream" in optional_params and optional_params["stream"] == True:
# don't try to access stream object, # don't try to access stream object,
response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph-alpha", logging_obj=logging) response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph_alpha", logging_obj=logging)
return response return response
response = model_response response = model_response
elif model in litellm.openrouter_models or custom_llm_provider == "openrouter": elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
@ -769,7 +769,7 @@ def completion(
if stream: if stream:
model_response = chat.send_message_streaming(prompt, **optional_params) model_response = chat.send_message_streaming(prompt, **optional_params)
response = CustomStreamWrapper( response = CustomStreamWrapper(
model_response, model, custom_llm_provider="vertexai", logging_obj=logging model_response, model, custom_llm_provider="vertex_ai", logging_obj=logging
) )
return response return response

View file

@ -643,24 +643,6 @@ def test_completion_sagemaker():
# test_completion_sagemaker() # test_completion_sagemaker()
def test_completion_sagemaker_stream():
litellm.set_verbose = False
try:
response = completion(
model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b",
messages=messages,
temperature=0.2,
max_tokens=80,
stream=True,
)
# Add any assertions here to check the response
for chunk in response:
print(chunk)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_sagemaker_stream()
def test_completion_bedrock_titan(): def test_completion_bedrock_titan():
try: try:
response = completion( response = completion(

View file

@ -9,7 +9,7 @@ sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import litellm import litellm
from litellm import completion, acompletion from litellm import completion, acompletion, AuthenticationError, InvalidRequestError
litellm.logging = False litellm.logging = False
litellm.set_verbose = False litellm.set_verbose = False
@ -187,6 +187,7 @@ def streaming_format_tests(idx, chunk):
finished = True finished = True
if "content" in chunk["choices"][0]["delta"]: if "content" in chunk["choices"][0]["delta"]:
extracted_chunk = chunk["choices"][0]["delta"]["content"] extracted_chunk = chunk["choices"][0]["delta"]["content"]
print(f"extracted chunk: {extracted_chunk}")
return extracted_chunk, finished return extracted_chunk, finished
def test_completion_cohere_stream(): def test_completion_cohere_stream():
@ -199,21 +200,120 @@ def test_completion_cohere_stream():
}, },
] ]
response = completion( response = completion(
model="command-nightly", messages=messages, stream=True, max_tokens=50 model="command-nightly", messages=messages, stream=True, max_tokens=50,
) )
complete_response = "" complete_response = ""
# Add any assertions here to check the response # Add any assertions here to check the response
has_finish_reason = False
for idx, chunk in enumerate(response): for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk) chunk, finished = streaming_format_tests(idx, chunk)
has_finish_reason = finished
if finished: if finished:
break break
complete_response += chunk complete_response += chunk
if has_finish_reason is False:
raise Exception("Finish reason not in final chunk")
if complete_response.strip() == "": if complete_response.strip() == "":
raise Exception("Empty response received") raise Exception("Empty response received")
print(f"completion_response: {complete_response}") print(f"completion_response: {complete_response}")
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# test_completion_cohere_stream()
def test_completion_cohere_stream_bad_key():
try:
api_key = "bad-key"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
]
response = completion(
model="command-nightly", messages=messages, stream=True, max_tokens=50, api_key=api_key
)
complete_response = ""
# Add any assertions here to check the response
has_finish_reason = False
for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk)
has_finish_reason = finished
if finished:
break
complete_response += chunk
if has_finish_reason is False:
raise Exception("Finish reason not in final chunk")
if complete_response.strip() == "":
raise Exception("Empty response received")
print(f"completion_response: {complete_response}")
except AuthenticationError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_cohere_stream_bad_key()
# def test_completion_nlp_cloud():
# try:
# messages = [
# {"role": "system", "content": "You are a helpful assistant."},
# {
# "role": "user",
# "content": "how does a court case get to the Supreme Court?",
# },
# ]
# response = completion(model="dolphin", messages=messages, stream=True)
# complete_response = ""
# # Add any assertions here to check the response
# has_finish_reason = False
# for idx, chunk in enumerate(response):
# chunk, finished = streaming_format_tests(idx, chunk)
# has_finish_reason = finished
# complete_response += chunk
# if finished:
# break
# if has_finish_reason is False:
# raise Exception("Finish reason not in final chunk")
# if complete_response.strip() == "":
# raise Exception("Empty response received")
# print(f"completion_response: {complete_response}")
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_nlp_cloud()
# def test_completion_nlp_cloud_bad_key():
# try:
# api_key = "bad-key"
# messages = [
# {"role": "system", "content": "You are a helpful assistant."},
# {
# "role": "user",
# "content": "how does a court case get to the Supreme Court?",
# },
# ]
# response = completion(model="dolphin", messages=messages, stream=True, api_key=api_key)
# complete_response = ""
# # Add any assertions here to check the response
# has_finish_reason = False
# for idx, chunk in enumerate(response):
# chunk, finished = streaming_format_tests(idx, chunk)
# has_finish_reason = finished
# complete_response += chunk
# if finished:
# break
# if has_finish_reason is False:
# raise Exception("Finish reason not in final chunk")
# if complete_response.strip() == "":
# raise Exception("Empty response received")
# print(f"completion_response: {complete_response}")
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_nlp_cloud_bad_key()
# def test_completion_hf_stream(): # def test_completion_hf_stream():
# try: # try:
# messages = [ # messages = [
@ -235,10 +335,41 @@ def test_completion_cohere_stream():
# if complete_response.strip() == "": # if complete_response.strip() == "":
# raise Exception("Empty response received") # raise Exception("Empty response received")
# print(f"completion_response: {complete_response}") # print(f"completion_response: {complete_response}")
# except InvalidRequestError as e:
# pass
# except Exception as e: # except Exception as e:
# pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")
# test_completion_hf_stream() # # test_completion_hf_stream()
# def test_completion_hf_stream_bad_key():
# try:
# api_key = "bad-key"
# messages = [
# {
# "content": "Hello! How are you today?",
# "role": "user"
# },
# ]
# response = completion(
# model="huggingface/meta-llama/Llama-2-7b-chat-hf", messages=messages, api_base="https://a8l9e3ucxinyl3oj.us-east-1.aws.endpoints.huggingface.cloud", stream=True, max_tokens=1000, api_key=api_key
# )
# complete_response = ""
# # Add any assertions here to check the response
# for idx, chunk in enumerate(response):
# chunk, finished = streaming_format_tests(idx, chunk)
# if finished:
# break
# complete_response += chunk
# if complete_response.strip() == "":
# raise Exception("Empty response received")
# print(f"completion_response: {complete_response}")
# except InvalidRequestError as e:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_hf_stream_bad_key()
def test_completion_claude_stream(): def test_completion_claude_stream():
try: try:
@ -266,19 +397,22 @@ def test_completion_claude_stream():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# test_completion_claude_stream() # test_completion_claude_stream()
def test_completion_bedrock_ai21_stream():
def test_completion_claude_stream_bad_key():
try: try:
litellm.set_verbose = False api_key = "bad-key"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
]
response = completion( response = completion(
model="bedrock/amazon.titan-tg1-large", model="claude-instant-1", messages=messages, stream=True, max_tokens=50, api_key=api_key
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
temperature=1,
max_tokens=4096,
stream=True,
) )
complete_response = "" complete_response = ""
# Add any assertions here to check the response # Add any assertions here to check the response
print(response)
for idx, chunk in enumerate(response): for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk) chunk, finished = streaming_format_tests(idx, chunk)
if finished: if finished:
@ -286,11 +420,263 @@ def test_completion_bedrock_ai21_stream():
complete_response += chunk complete_response += chunk
if complete_response.strip() == "": if complete_response.strip() == "":
raise Exception("Empty response received") raise Exception("Empty response received")
print(f"completion_response: {complete_response}")
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# test_completion_cohere_stream() # test_completion_claude_stream_bad_key()
def test_completion_replicate_stream():
try:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
]
response = completion(
model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50
)
complete_response = ""
has_finish_reason = False
# Add any assertions here to check the response
for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk)
has_finish_reason = finished
if finished:
break
complete_response += chunk
if has_finish_reason is False:
raise Exception("finish reason not set for last chunk")
if complete_response.strip() == "":
raise Exception("Empty response received")
print(f"completion_response: {complete_response}")
except InvalidRequestError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_replicate_stream()
# def test_completion_vertexai_stream():
# try:
# import os
# os.environ["VERTEXAI_PROJECT"] = "pathrise-convert-1606954137718"
# os.environ["VERTEXAI_LOCATION"] = "us-central1"
# messages = [
# {"role": "system", "content": "You are a helpful assistant."},
# {
# "role": "user",
# "content": "how does a court case get to the Supreme Court?",
# },
# ]
# response = completion(
# model="vertex_ai/chat-bison", messages=messages, stream=True, max_tokens=50
# )
# complete_response = ""
# has_finish_reason = False
# # Add any assertions here to check the response
# for idx, chunk in enumerate(response):
# chunk, finished = streaming_format_tests(idx, chunk)
# has_finish_reason = finished
# if finished:
# break
# complete_response += chunk
# if has_finish_reason is False:
# raise Exception("finish reason not set for last chunk")
# if complete_response.strip() == "":
# raise Exception("Empty response received")
# print(f"completion_response: {complete_response}")
# except InvalidRequestError as e:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_vertexai_stream()
# def test_completion_vertexai_stream_bad_key():
# try:
# import os
# messages = [
# {"role": "system", "content": "You are a helpful assistant."},
# {
# "role": "user",
# "content": "how does a court case get to the Supreme Court?",
# },
# ]
# response = completion(
# model="vertex_ai/chat-bison", messages=messages, stream=True, max_tokens=50
# )
# complete_response = ""
# has_finish_reason = False
# # Add any assertions here to check the response
# for idx, chunk in enumerate(response):
# chunk, finished = streaming_format_tests(idx, chunk)
# has_finish_reason = finished
# if finished:
# break
# complete_response += chunk
# if has_finish_reason is False:
# raise Exception("finish reason not set for last chunk")
# if complete_response.strip() == "":
# raise Exception("Empty response received")
# print(f"completion_response: {complete_response}")
# except InvalidRequestError as e:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_vertexai_stream_bad_key()
def test_completion_replicate_stream():
try:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
]
response = completion(
model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50
)
complete_response = ""
has_finish_reason = False
# Add any assertions here to check the response
for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk)
has_finish_reason = finished
if finished:
break
complete_response += chunk
if has_finish_reason is False:
raise Exception("finish reason not set for last chunk")
if complete_response.strip() == "":
raise Exception("Empty response received")
print(f"completion_response: {complete_response}")
except InvalidRequestError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_replicate_stream_bad_key():
try:
api_key = "bad-key"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
]
response = completion(
model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", messages=messages, stream=True, max_tokens=50, api_key=api_key
)
complete_response = ""
# Add any assertions here to check the response
for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk)
if finished:
break
complete_response += chunk
if complete_response.strip() == "":
raise Exception("Empty response received")
print(f"completion_response: {complete_response}")
except InvalidRequestError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_replicate_stream_bad_key()
def test_completion_bedrock_ai21_stream():
try:
response = completion(
model="bedrock/amazon.titan-tg1-large",
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
temperature=1,
max_tokens=4096,
stream=True,
)
complete_response = ""
has_finish_reason = False
# Add any assertions here to check the response
for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk)
has_finish_reason = finished
complete_response += chunk
if finished:
break
if has_finish_reason is False:
raise Exception("finish reason not set for last chunk")
if complete_response.strip() == "":
raise Exception("Empty response received")
print(f"completion_response: {complete_response}")
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_ai21_stream()
def test_completion_bedrock_ai21_stream_bad_key():
try:
response = completion(
model="bedrock/amazon.titan-tg1-large",
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
temperature=1,
max_tokens=4096,
stream=True,
)
complete_response = ""
has_finish_reason = False
# Add any assertions here to check the response
for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk)
has_finish_reason = finished
if finished:
break
complete_response += chunk
if has_finish_reason is False:
raise Exception("finish reason not set for last chunk")
if complete_response.strip() == "":
raise Exception("Empty response received")
print(f"completion_response: {complete_response}")
except InvalidRequestError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_ai21_stream_bad_key()
def test_completion_sagemaker_stream():
try:
response = completion(
model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b",
messages=messages,
temperature=0.2,
max_tokens=80,
stream=True,
)
complete_response = ""
has_finish_reason = False
# Add any assertions here to check the response
for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk)
has_finish_reason = finished
if finished:
break
complete_response += chunk
if has_finish_reason is False:
raise Exception("finish reason not set for last chunk")
if complete_response.strip() == "":
raise Exception("Empty response received")
except InvalidRequestError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_completion_sagemaker_stream()
# test on openai completion call # test on openai completion call
def test_openai_text_completion_call(): def test_openai_text_completion_call():
@ -314,7 +700,33 @@ def test_openai_text_completion_call():
def ai21_completion_call(): def ai21_completion_call():
try: try:
response = completion( response = completion(
model="j2-ultra", messages=messages, stream=True, logger_fn=logger_fn model="j2-ultra", messages=messages, stream=True
)
print(f"response: {response}")
has_finished = False
complete_response = ""
start_time = time.time()
for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk)
has_finished = finished
complete_response += chunk
if finished:
break
if has_finished is False:
raise Exception("finished reason missing from final chunk")
if complete_response.strip() == "":
raise Exception("Empty response received")
print(f"completion_response: {complete_response}")
except:
pytest.fail(f"error occurred: {traceback.format_exc()}")
# ai21_completion_call()
def ai21_completion_call_bad_key():
try:
api_key = "bad-key"
response = completion(
model="j2-ultra", messages=messages, stream=True, api_key=api_key
) )
print(f"response: {response}") print(f"response: {response}")
complete_response = "" complete_response = ""
@ -327,10 +739,64 @@ def ai21_completion_call():
if complete_response.strip() == "": if complete_response.strip() == "":
raise Exception("Empty response received") raise Exception("Empty response received")
print(f"completion_response: {complete_response}") print(f"completion_response: {complete_response}")
except InvalidRequestError as e:
pass
except: except:
pytest.fail(f"error occurred: {traceback.format_exc()}") pytest.fail(f"error occurred: {traceback.format_exc()}")
# ai21_completion_call() # ai21_completion_call_bad_key()
def test_completion_aleph_alpha():
try:
response = completion(
model="luminous-base", messages=messages, stream=True
)
# Add any assertions here to check the response
has_finished = False
complete_response = ""
start_time = time.time()
for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk)
has_finished = finished
complete_response += chunk
if finished:
break
if has_finished is False:
raise Exception("finished reason missing from final chunk")
if complete_response.strip() == "":
raise Exception("Empty response received")
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_aleph_alpha()
# def test_completion_aleph_alpha_bad_key():
# try:
# api_key = "bad-key"
# response = completion(
# model="luminous-base", messages=messages, stream=True, api_key=api_key
# )
# # Add any assertions here to check the response
# has_finished = False
# complete_response = ""
# start_time = time.time()
# for idx, chunk in enumerate(response):
# chunk, finished = streaming_format_tests(idx, chunk)
# has_finished = finished
# complete_response += chunk
# if finished:
# break
# if has_finished is False:
# raise Exception("finished reason missing from final chunk")
# if complete_response.strip() == "":
# raise Exception("Empty response received")
# except InvalidRequestError as e:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
# test_completion_aleph_alpha_bad_key()
# test on openai completion call # test on openai completion call
def test_openai_chat_completion_call(): def test_openai_chat_completion_call():
try: try:
@ -366,11 +832,15 @@ def test_together_ai_completion_call_starcoder():
) )
complete_response = "" complete_response = ""
print(f"returned response object: {response}") print(f"returned response object: {response}")
has_finish_reason = False
for idx, chunk in enumerate(response): for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk) chunk, finished = streaming_format_tests(idx, chunk)
has_finish_reason = finished
if finished: if finished:
break break
complete_response += chunk complete_response += chunk
if has_finish_reason is False:
raise Exception("Finish reason not set for last chunk")
if complete_response == "": if complete_response == "":
raise Exception("Empty response received") raise Exception("Empty response received")
print(f"complete response: {complete_response}") print(f"complete response: {complete_response}")
@ -378,6 +848,38 @@ def test_together_ai_completion_call_starcoder():
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pass pass
# test_together_ai_completion_call_starcoder()
def test_together_ai_completion_call_starcoder_bad_key():
try:
api_key = "bad-key"
start_time = time.time()
response = completion(
model="together_ai/bigcode/starcoder",
messages=messages,
stream=True,
api_key=api_key
)
complete_response = ""
has_finish_reason = False
for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk)
has_finish_reason = finished
if finished:
break
complete_response += chunk
if has_finish_reason is False:
raise Exception("Finish reason not set for last chunk")
if complete_response == "":
raise Exception("Empty response received")
print(f"complete response: {complete_response}")
except InvalidRequestError as e:
pass
except:
print(f"error occurred: {traceback.format_exc()}")
pass
# test_together_ai_completion_call_starcoder_bad_key()
#### Test Function calling + streaming #### #### Test Function calling + streaming ####
def test_completion_openai_with_functions(): def test_completion_openai_with_functions():

View file

@ -2,6 +2,7 @@ import sys
import dotenv, json, traceback, threading import dotenv, json, traceback, threading
import subprocess, os import subprocess, os
import litellm, openai import litellm, openai
import itertools
import random, uuid, requests import random, uuid, requests
import datetime, time import datetime, time
import tiktoken import tiktoken
@ -1915,7 +1916,6 @@ def exception_type(
): ):
global user_logger_fn, liteDebuggerClient global user_logger_fn, liteDebuggerClient
exception_mapping_worked = False exception_mapping_worked = False
if litellm.set_verbose == True: if litellm.set_verbose == True:
litellm.error_logs['EXCEPTION'] = original_exception litellm.error_logs['EXCEPTION'] = original_exception
litellm.error_logs['KWARGS'] = completion_kwargs litellm.error_logs['KWARGS'] = completion_kwargs
@ -1970,7 +1970,7 @@ def exception_type(
exception_type = type(original_exception).__name__ exception_type = type(original_exception).__name__
else: else:
exception_type = "" exception_type = ""
if "claude" in model: # one of the anthropics if custom_llm_provider == "anthropic": # one of the anthropics
if hasattr(original_exception, "message"): if hasattr(original_exception, "message"):
if "prompt is too long" in original_exception.message: if "prompt is too long" in original_exception.message:
exception_mapping_worked = True exception_mapping_worked = True
@ -1979,6 +1979,13 @@ def exception_type(
model=model, model=model,
llm_provider="anthropic" llm_provider="anthropic"
) )
if "Invalid API Key" in original_exception.message:
exception_mapping_worked = True
raise AuthenticationError(
message=original_exception.message,
model=model,
llm_provider="anthropic"
)
if hasattr(original_exception, "status_code"): if hasattr(original_exception, "status_code"):
print_verbose(f"status_code: {original_exception.status_code}") print_verbose(f"status_code: {original_exception.status_code}")
if original_exception.status_code == 401: if original_exception.status_code == 401:
@ -2031,7 +2038,7 @@ def exception_type(
llm_provider="anthropic", llm_provider="anthropic",
model=model model=model
) )
elif "replicate" in model: elif custom_llm_provider == "replicate":
if "Incorrect authentication token" in error_str: if "Incorrect authentication token" in error_str:
exception_mapping_worked = True exception_mapping_worked = True
raise AuthenticationError( raise AuthenticationError(
@ -2068,7 +2075,7 @@ def exception_type(
llm_provider="replicate", llm_provider="replicate",
model=model model=model
) )
elif original_exception.status_code == 400: elif original_exception.status_code == 400 or original_exception.status_code == 422:
exception_mapping_worked = True exception_mapping_worked = True
raise InvalidRequestError( raise InvalidRequestError(
message=f"ReplicateException - {original_exception.message}", message=f"ReplicateException - {original_exception.message}",
@ -2110,7 +2117,31 @@ def exception_type(
llm_provider="replicate", llm_provider="replicate",
model=model model=model
) )
elif model in litellm.cohere_models or custom_llm_provider == "cohere": # Cohere elif custom_llm_provider == "bedrock":
if "Unable to locate credentials" in error_str:
exception_mapping_worked = True
raise InvalidRequestError(
message=f"BedrockException - {error_str}",
model=model,
llm_provider="bedrock"
)
elif custom_llm_provider == "sagemaker":
if "Unable to locate credentials" in error_str:
exception_mapping_worked = True
raise InvalidRequestError(
message=f"SagemakerException - {error_str}",
model=model,
llm_provider="sagemaker"
)
elif custom_llm_provider == "vertex_ai":
if "Vertex AI API has not been used in project" in error_str or "Unable to find your project" in error_str:
exception_mapping_worked = True
raise InvalidRequestError(
message=f"VertexAIException - {error_str}",
model=model,
llm_provider="vertex_ai"
)
elif custom_llm_provider == "cohere": # Cohere
if ( if (
"invalid api token" in error_str "invalid api token" in error_str
or "No API key provided." in error_str or "No API key provided." in error_str
@ -2184,6 +2215,13 @@ def exception_type(
model=model, model=model,
llm_provider="huggingface" llm_provider="huggingface"
) )
elif "A valid user token is required" in error_str:
exception_mapping_worked = True
raise InvalidRequestError(
message=error_str,
llm_provider="huggingface",
model=model
)
if hasattr(original_exception, "status_code"): if hasattr(original_exception, "status_code"):
if original_exception.status_code == 401: if original_exception.status_code == 401:
exception_mapping_worked = True exception_mapping_worked = True
@ -2221,6 +2259,8 @@ def exception_type(
llm_provider="huggingface", llm_provider="huggingface",
model=model model=model
) )
exception_mapping_worked = True
raise APIError(status_code=500, message=error_str, model=model, llm_provider=custom_llm_provider)
elif custom_llm_provider == "ai21": elif custom_llm_provider == "ai21":
if hasattr(original_exception, "message"): if hasattr(original_exception, "message"):
if "Prompt has too many tokens" in original_exception.message: if "Prompt has too many tokens" in original_exception.message:
@ -2230,6 +2270,13 @@ def exception_type(
model=model, model=model,
llm_provider="ai21" llm_provider="ai21"
) )
if "Bad or missing API token." in original_exception.message:
exception_mapping_worked = True
raise InvalidRequestError(
message=f"AI21Exception - {original_exception.message}",
model=model,
llm_provider="ai21"
)
if hasattr(original_exception, "status_code"): if hasattr(original_exception, "status_code"):
if original_exception.status_code == 401: if original_exception.status_code == 401:
exception_mapping_worked = True exception_mapping_worked = True
@ -2266,7 +2313,7 @@ def exception_type(
llm_provider="ai21", llm_provider="ai21",
model=model model=model
) )
elif model in litellm.nlp_cloud_models or custom_llm_provider == "nlp_cloud": elif custom_llm_provider == "nlp_cloud":
if "detail" in error_str: if "detail" in error_str:
if "Input text length should not exceed" in error_str: if "Input text length should not exceed" in error_str:
exception_mapping_worked = True exception_mapping_worked = True
@ -2342,6 +2389,7 @@ def exception_type(
model=model model=model
) )
elif custom_llm_provider == "together_ai": elif custom_llm_provider == "together_ai":
import json
error_response = json.loads(error_str) error_response = json.loads(error_str)
if "error" in error_response and "`inputs` tokens + `max_new_tokens` must be <=" in error_response["error"]: if "error" in error_response and "`inputs` tokens + `max_new_tokens` must be <=" in error_response["error"]:
exception_mapping_worked = True exception_mapping_worked = True
@ -2364,6 +2412,13 @@ def exception_type(
model=model, model=model,
llm_provider="together_ai" llm_provider="together_ai"
) )
elif "error" in error_response and "API key doesn't match expected format." in error_response["error"]:
exception_mapping_worked = True
raise InvalidRequestError(
message=f"TogetherAIException - {error_response['error']}",
model=model,
llm_provider="together_ai"
)
elif "error_type" in error_response and error_response["error_type"] == "validation": elif "error_type" in error_response and error_response["error_type"] == "validation":
exception_mapping_worked = True exception_mapping_worked = True
raise InvalidRequestError( raise InvalidRequestError(
@ -2393,7 +2448,7 @@ def exception_type(
llm_provider="together_ai", llm_provider="together_ai",
model=model model=model
) )
elif model in litellm.aleph_alpha_models: elif custom_llm_provider == "aleph_alpha":
if "This is longer than the model's maximum context length" in error_str: if "This is longer than the model's maximum context length" in error_str:
exception_mapping_worked = True exception_mapping_worked = True
raise ContextWindowExceededError( raise ContextWindowExceededError(
@ -2401,6 +2456,13 @@ def exception_type(
llm_provider="aleph_alpha", llm_provider="aleph_alpha",
model=model model=model
) )
elif "InvalidToken" in error_str or "No token provided" in error_str:
exception_mapping_worked = True
raise InvalidRequestError(
message=f"AlephAlphaException - {original_exception.message}",
llm_provider="aleph_alpha",
model=model
)
elif hasattr(original_exception, "status_code"): elif hasattr(original_exception, "status_code"):
print(f"status code: {original_exception.status_code}") print(f"status code: {original_exception.status_code}")
if original_exception.status_code == 401: if original_exception.status_code == 401:
@ -2445,7 +2507,8 @@ def exception_type(
elif custom_llm_provider == "ollama": elif custom_llm_provider == "ollama":
if "no attribute 'async_get_ollama_response_stream" in error_str: if "no attribute 'async_get_ollama_response_stream" in error_str:
raise ImportError("Import error - trying to use async for ollama. import async_generator failed. Try 'pip install async_generator'") raise ImportError("Import error - trying to use async for ollama. import async_generator failed. Try 'pip install async_generator'")
raise original_exception exception_mapping_worked = True
raise APIError(status_code=500, message=str(original_exception), llm_provider=custom_llm_provider, model=model)
except Exception as e: except Exception as e:
# LOGGING # LOGGING
exception_logging( exception_logging(
@ -2563,6 +2626,7 @@ class CustomStreamWrapper:
self.logging_obj = logging_obj self.logging_obj = logging_obj
self.completion_stream = completion_stream self.completion_stream = completion_stream
self.sent_first_chunk = False self.sent_first_chunk = False
self.sent_last_chunk = False
if self.logging_obj: if self.logging_obj:
# Log the type of the received item # Log the type of the received item
self.logging_obj.post_call(str(type(completion_stream))) self.logging_obj.post_call(str(type(completion_stream)))
@ -2579,41 +2643,71 @@ class CustomStreamWrapper:
def handle_anthropic_chunk(self, chunk): def handle_anthropic_chunk(self, chunk):
str_line = chunk.decode("utf-8") # Convert bytes to string str_line = chunk.decode("utf-8") # Convert bytes to string
print(f"str_line: {str_line}")
text = ""
is_finished = False
finish_reason = None
if str_line.startswith("data:"): if str_line.startswith("data:"):
data_json = json.loads(str_line[5:]) data_json = json.loads(str_line[5:])
return data_json.get("completion", "") text = data_json.get("completion", "")
return "" if data_json.get("stop_reason", None):
is_finished = True
finish_reason = data_json["stop_reason"]
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
elif "error" in str_line:
raise ValueError(f"Unable to parse response. Original response: {str_line}")
else:
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
def handle_together_ai_chunk(self, chunk): def handle_together_ai_chunk(self, chunk):
chunk = chunk.decode("utf-8") chunk = chunk.decode("utf-8")
text_index = chunk.find('"text":"') # this checks if text: exists text = ""
text_start = text_index + len('"text":"') is_finished = False
text_end = chunk.find('"}', text_start) finish_reason = None
if text_index != -1 and text_end != -1: if "text" in chunk:
extracted_text = chunk[text_start:text_end] text_index = chunk.find('"text":"') # this checks if text: exists
return extracted_text text_start = text_index + len('"text":"')
text_end = chunk.find('"}', text_start)
if text_index != -1 and text_end != -1:
extracted_text = chunk[text_start:text_end]
text = extracted_text
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
elif "[DONE]" in chunk:
return {"text": text, "is_finished": True, "finish_reason": "stop"}
elif "error" in chunk:
raise ValueError(chunk)
else: else:
return "" return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
def handle_huggingface_chunk(self, chunk): def handle_huggingface_chunk(self, chunk):
chunk = chunk.decode("utf-8") chunk = chunk.decode("utf-8")
text = ""
is_finished = False
finish_reason = ""
if chunk.startswith("data:"): if chunk.startswith("data:"):
data_json = json.loads(chunk[5:]) data_json = json.loads(chunk[5:])
print(f"data json: {data_json}")
if "token" in data_json and "text" in data_json["token"]: if "token" in data_json and "text" in data_json["token"]:
text = data_json["token"]["text"] text = data_json["token"]["text"]
if "meta-llama/Llama-2" in self.model: #clean eos tokens like </s> from the returned output text if "meta-llama/Llama-2" in self.model: #clean eos tokens like </s> from the returned output text
if any(token in text for token in llama_2_special_tokens): if any(token in text for token in llama_2_special_tokens):
text = text.replace("<s>", "").replace("</s>", "") text = text.replace("<s>", "").replace("</s>", "")
return text if data_json.get("details", False) and data_json["details"].get("finish_reason", False):
else: is_finished = True
return "" finish_reason = data_json["details"]["finish_reason"]
return "" return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
elif "error" in chunk:
raise ValueError(chunk)
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
def handle_ai21_chunk(self, chunk): def handle_ai21_chunk(self, chunk): # fake streaming
chunk = chunk.decode("utf-8") chunk = chunk.decode("utf-8")
data_json = json.loads(chunk) data_json = json.loads(chunk)
try: try:
return data_json["completions"][0]["data"]["text"] text = data_json["completions"][0]["data"]["text"]
is_finished = True
finish_reason = "stop"
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
except: except:
raise ValueError(f"Unable to parse response. Original response: {chunk}") raise ValueError(f"Unable to parse response. Original response: {chunk}")
@ -2621,8 +2715,10 @@ class CustomStreamWrapper:
chunk = chunk.decode("utf-8") chunk = chunk.decode("utf-8")
data_json = json.loads(chunk) data_json = json.loads(chunk)
try: try:
print(f"data json: {data_json}") text = data_json["generated_text"]
return data_json["generated_text"] is_finished = True
finish_reason = "stop"
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
except: except:
raise ValueError(f"Unable to parse response. Original response: {chunk}") raise ValueError(f"Unable to parse response. Original response: {chunk}")
@ -2630,7 +2726,10 @@ class CustomStreamWrapper:
chunk = chunk.decode("utf-8") chunk = chunk.decode("utf-8")
data_json = json.loads(chunk) data_json = json.loads(chunk)
try: try:
return data_json["completions"][0]["completion"] text = data_json["completions"][0]["completion"]
is_finished = True
finish_reason = "stop"
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
except: except:
raise ValueError(f"Unable to parse response. Original response: {chunk}") raise ValueError(f"Unable to parse response. Original response: {chunk}")
@ -2638,7 +2737,35 @@ class CustomStreamWrapper:
chunk = chunk.decode("utf-8") chunk = chunk.decode("utf-8")
data_json = json.loads(chunk) data_json = json.loads(chunk)
try: try:
return data_json["text"] text = ""
is_finished = False
finish_reason = ""
if "text" in data_json:
text = data_json["text"]
elif "is_finished" in data_json:
is_finished = data_json["is_finished"]
finish_reason = data_json["finish_reason"]
else:
raise Exception(data_json)
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
except:
raise ValueError(f"Unable to parse response. Original response: {chunk}")
def handle_replicate_chunk(self, chunk):
print(f"chunk: {chunk}")
try:
text = ""
is_finished = False
finish_reason = ""
if "output" in chunk:
text = chunk['output']
if "status" in chunk:
if chunk["status"] == "succeeded":
is_finished = True
finish_reason = "stop"
elif chunk.get("error", None):
raise Exception(chunk["error"])
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
except: except:
raise ValueError(f"Unable to parse response. Original response: {chunk}") raise ValueError(f"Unable to parse response. Original response: {chunk}")
@ -2683,13 +2810,21 @@ class CustomStreamWrapper:
traceback.print_exc() traceback.print_exc()
return "" return ""
def handle_bedrock_stream(self): def handle_bedrock_stream(self, chunk):
if self.completion_stream: chunk = chunk.get('chunk')
event = next(self.completion_stream) if chunk:
chunk = event.get('chunk') chunk_data = json.loads(chunk.get('bytes').decode())
if chunk: text = ""
chunk_data = json.loads(chunk.get('bytes').decode()) is_finished = False
return chunk_data['outputText'] finish_reason = ""
if "outputText" in chunk_data:
text = chunk_data['outputText']
if chunk_data.get("completionReason", None):
is_finished = True
finish_reason = chunk_data["completionReason"]
elif chunk.get("error", None):
raise Exception(chunk["error"])
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
return "" return ""
## needs to handle the empty string case (even starting chunk can be an empty string) ## needs to handle the empty string case (even starting chunk can be an empty string)
@ -2701,49 +2836,94 @@ class CustomStreamWrapper:
completion_obj = {"content": ""} completion_obj = {"content": ""}
if self.custom_llm_provider and self.custom_llm_provider == "anthropic": if self.custom_llm_provider and self.custom_llm_provider == "anthropic":
chunk = next(self.completion_stream) chunk = next(self.completion_stream)
completion_obj["content"] = self.handle_anthropic_chunk(chunk) response_obj = self.handle_anthropic_chunk(chunk)
completion_obj["content"] = response_obj["text"]
if response_obj["is_finished"]:
model_response.choices[0].finish_reason = response_obj["finish_reason"]
elif self.model == "replicate" or self.custom_llm_provider == "replicate": elif self.model == "replicate" or self.custom_llm_provider == "replicate":
chunk = next(self.completion_stream) chunk = next(self.completion_stream)
completion_obj["content"] = chunk response_obj = self.handle_replicate_chunk(chunk)
completion_obj["content"] = response_obj["text"]
if response_obj["is_finished"]:
model_response.choices[0].finish_reason = response_obj["finish_reason"]
elif ( elif (
self.custom_llm_provider and self.custom_llm_provider == "together_ai"): self.custom_llm_provider and self.custom_llm_provider == "together_ai"):
chunk = next(self.completion_stream) chunk = next(self.completion_stream)
text_data = self.handle_together_ai_chunk(chunk) response_obj = self.handle_together_ai_chunk(chunk)
if text_data == "": completion_obj["content"] = response_obj["text"]
return self.__next__() if response_obj["is_finished"]:
completion_obj["content"] = text_data model_response.choices[0].finish_reason = response_obj["finish_reason"]
elif self.custom_llm_provider and self.custom_llm_provider == "huggingface": elif self.custom_llm_provider and self.custom_llm_provider == "huggingface":
chunk = next(self.completion_stream) chunk = next(self.completion_stream)
completion_obj["content"] = self.handle_huggingface_chunk(chunk) response_obj = self.handle_huggingface_chunk(chunk)
completion_obj["content"] = response_obj["text"]
if response_obj["is_finished"]:
model_response.choices[0].finish_reason = response_obj["finish_reason"]
elif self.custom_llm_provider and self.custom_llm_provider == "baseten": # baseten doesn't provide streaming elif self.custom_llm_provider and self.custom_llm_provider == "baseten": # baseten doesn't provide streaming
chunk = next(self.completion_stream) chunk = next(self.completion_stream)
completion_obj["content"] = self.handle_baseten_chunk(chunk) completion_obj["content"] = self.handle_baseten_chunk(chunk)
elif self.custom_llm_provider and self.custom_llm_provider == "ai21": #ai21 doesn't provide streaming elif self.custom_llm_provider and self.custom_llm_provider == "ai21": #ai21 doesn't provide streaming
chunk = next(self.completion_stream) chunk = next(self.completion_stream)
completion_obj["content"] = self.handle_ai21_chunk(chunk) response_obj = self.handle_ai21_chunk(chunk)
completion_obj["content"] = response_obj["text"]
if response_obj["is_finished"]:
model_response.choices[0].finish_reason = response_obj["finish_reason"]
elif self.custom_llm_provider and self.custom_llm_provider == "vllm": elif self.custom_llm_provider and self.custom_llm_provider == "vllm":
chunk = next(self.completion_stream) chunk = next(self.completion_stream)
completion_obj["content"] = chunk[0].outputs[0].text completion_obj["content"] = chunk[0].outputs[0].text
elif self.custom_llm_provider and self.custom_llm_provider == "aleph-alpha": #aleph alpha doesn't provide streaming elif self.custom_llm_provider and self.custom_llm_provider == "aleph_alpha": #aleph alpha doesn't provide streaming
chunk = next(self.completion_stream) chunk = next(self.completion_stream)
completion_obj["content"] = self.handle_aleph_alpha_chunk(chunk) response_obj = self.handle_aleph_alpha_chunk(chunk)
completion_obj["content"] = response_obj["text"]
if response_obj["is_finished"]:
model_response.choices[0].finish_reason = response_obj["finish_reason"]
elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai": elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai":
chunk = next(self.completion_stream) chunk = next(self.completion_stream)
completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk) completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud": elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud":
chunk = next(self.completion_stream) try:
completion_obj["content"] = self.handle_nlp_cloud_chunk(chunk) chunk = next(self.completion_stream)
elif self.model in (litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models): response_obj = self.handle_nlp_cloud_chunk(chunk)
chunk = next(self.completion_stream) completion_obj["content"] = response_obj["text"]
completion_obj["content"] = str(chunk) if response_obj["is_finished"]:
model_response.choices[0].finish_reason = response_obj["finish_reason"]
except Exception as e:
if self.sent_last_chunk:
raise e
else:
if self.sent_first_chunk is False:
raise Exception("An unknown error occurred with the stream")
model_response.choices[0].finish_reason = "stop"
self.sent_last_chunk = True
elif self.custom_llm_provider and self.custom_llm_provider == "vertex_ai":
try:
chunk = next(self.completion_stream)
completion_obj["content"] = str(chunk)
except StopIteration as e:
if self.sent_last_chunk:
raise e
else:
model_response.choices[0].finish_reason = "stop"
self.sent_last_chunk = True
elif self.custom_llm_provider == "cohere": elif self.custom_llm_provider == "cohere":
chunk = next(self.completion_stream) chunk = next(self.completion_stream)
completion_obj["content"] = self.handle_cohere_chunk(chunk) response_obj = self.handle_cohere_chunk(chunk)
completion_obj["content"] = response_obj["text"]
if response_obj["is_finished"]:
model_response.choices[0].finish_reason = response_obj["finish_reason"]
elif self.custom_llm_provider == "bedrock": elif self.custom_llm_provider == "bedrock":
completion_obj["content"] = self.handle_bedrock_stream() chunk = next(self.completion_stream)
response_obj = self.handle_bedrock_stream(chunk)
completion_obj["content"] = response_obj["text"]
if response_obj["is_finished"]:
model_response.choices[0].finish_reason = response_obj["finish_reason"]
elif self.custom_llm_provider == "sagemaker": elif self.custom_llm_provider == "sagemaker":
if len(self.completion_stream)==0: if len(self.completion_stream)==0:
raise StopIteration if self.sent_last_chunk:
raise StopIteration
else:
model_response.choices[0].finish_reason = "stop"
self.sent_last_chunk = True
chunk_size = 30 chunk_size = 30
new_chunk = self.completion_stream[:chunk_size] new_chunk = self.completion_stream[:chunk_size]
completion_obj["content"] = new_chunk completion_obj["content"] = new_chunk
@ -2765,11 +2945,13 @@ class CustomStreamWrapper:
self.sent_first_chunk = True self.sent_first_chunk = True
model_response.choices[0].delta = Delta(**completion_obj) model_response.choices[0].delta = Delta(**completion_obj)
return model_response return model_response
elif model_response.choices[0].finish_reason:
return model_response
except StopIteration: except StopIteration:
raise StopIteration raise StopIteration
except Exception as e: except Exception as e:
model_response.choices[0].finish_reason = "stop" e.message = str(e)
return model_response return exception_type(model=self.model, custom_llm_provider=self.custom_llm_provider, original_exception=e)
async def __anext__(self): async def __anext__(self):
try: try:
@ -2796,7 +2978,6 @@ def read_config_args(config_path) -> dict:
# read keys/ values from config file and return them # read keys/ values from config file and return them
return config return config
except Exception as e: except Exception as e:
print("An error occurred while reading config:", str(e))
raise e raise e
########## experimental completion variants ############################ ########## experimental completion variants ############################
@ -2899,7 +3080,6 @@ def get_model_split_test(models, completion_call_id):
try: try:
# make the api call # make the api call
last_fetched_at = time.time() last_fetched_at = time.time()
print(f"last_fetched_at: {last_fetched_at}")
response = requests.post( response = requests.post(
#http://api.litellm.ai #http://api.litellm.ai
url="http://api.litellm.ai/get_model_split_test", # get the updated dict from table or update the table with the dict url="http://api.litellm.ai/get_model_split_test", # get the updated dict from table or update the table with the dict

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "0.1.738" version = "0.1.739"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT License" license = "MIT License"