mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 10:14:26 +00:00
bump: version 0.8.4 → 0.8.5
This commit is contained in:
parent
80c60e71c1
commit
7358d2e4ea
11 changed files with 228 additions and 7343 deletions
Binary file not shown.
Binary file not shown.
|
@ -6,7 +6,7 @@ import requests
|
|||
import time
|
||||
import litellm
|
||||
from typing import Callable
|
||||
from litellm.utils import ModelResponse, Choices, Message
|
||||
from litellm.utils import ModelResponse, Choices, Message, CustomStreamWrapper
|
||||
from typing import Optional
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
|
@ -65,12 +65,17 @@ class HuggingfaceConfig():
|
|||
and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
|
||||
and v is not None}
|
||||
|
||||
def validate_environment(api_key):
|
||||
headers = {
|
||||
def validate_environment(api_key, headers):
|
||||
default_headers = {
|
||||
"content-type": "application/json",
|
||||
}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
if api_key and headers is None:
|
||||
default_headers["Authorization"] = f"Bearer {api_key}" # Huggingface Inference Endpoint default is to accept bearer tokens
|
||||
headers = default_headers
|
||||
elif headers:
|
||||
headers=headers
|
||||
else:
|
||||
headers = default_headers
|
||||
return headers
|
||||
|
||||
tgi_models_cache = None
|
||||
|
@ -125,6 +130,7 @@ def completion(
|
|||
model: str,
|
||||
messages: list,
|
||||
api_base: Optional[str],
|
||||
headers: Optional[dict],
|
||||
model_response: ModelResponse,
|
||||
print_verbose: Callable,
|
||||
encoding,
|
||||
|
@ -135,7 +141,8 @@ def completion(
|
|||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
):
|
||||
headers = validate_environment(api_key)
|
||||
print(f'headers inside hf rest api: {headers}')
|
||||
headers = validate_environment(api_key, headers)
|
||||
task = get_hf_task_for_model(model)
|
||||
print_verbose(f"{model}, {task}")
|
||||
completion_url = ""
|
||||
|
@ -227,7 +234,7 @@ def completion(
|
|||
logging_obj.pre_call(
|
||||
input=input_text,
|
||||
api_key=api_key,
|
||||
additional_args={"complete_input_dict": data, "task": task},
|
||||
additional_args={"complete_input_dict": data, "task": task, "headers": headers},
|
||||
)
|
||||
## COMPLETION CALL
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
|
@ -244,20 +251,43 @@ def completion(
|
|||
headers=headers,
|
||||
data=json.dumps(data)
|
||||
)
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=input_text,
|
||||
api_key=api_key,
|
||||
original_response=response.text,
|
||||
additional_args={"complete_input_dict": data, "task": task},
|
||||
)
|
||||
## RESPONSE OBJECT
|
||||
try:
|
||||
completion_response = response.json()
|
||||
except:
|
||||
raise HuggingfaceError(
|
||||
message=response.text, status_code=response.status_code
|
||||
|
||||
## Some servers might return streaming responses even though stream was not set to true. (e.g. Baseten)
|
||||
is_streamed = False
|
||||
print(f"response keys: {response.__dict__.keys()}")
|
||||
print(f"response keys: {response.__dict__['headers']}")
|
||||
if response.__dict__['headers']["Content-Type"] == "text/event-stream":
|
||||
is_streamed = True
|
||||
|
||||
# iterate over the complete streamed response, and return the final answer
|
||||
if is_streamed:
|
||||
streamed_response = CustomStreamWrapper(completion_stream=response.iter_lines(), model=model, custom_llm_provider="huggingface", logging_obj=logging_obj)
|
||||
content = ""
|
||||
for chunk in streamed_response:
|
||||
content += chunk["choices"][0]["delta"]["content"]
|
||||
completion_response = [{"generated_text": content}]
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=input_text,
|
||||
api_key=api_key,
|
||||
original_response=completion_response,
|
||||
additional_args={"complete_input_dict": data, "task": task},
|
||||
)
|
||||
else:
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=input_text,
|
||||
api_key=api_key,
|
||||
original_response=response.text,
|
||||
additional_args={"complete_input_dict": data, "task": task},
|
||||
)
|
||||
## RESPONSE OBJECT
|
||||
try:
|
||||
completion_response = response.json()
|
||||
except:
|
||||
raise HuggingfaceError(
|
||||
message=response.text, status_code=response.status_code
|
||||
)
|
||||
print_verbose(f"response: {completion_response}")
|
||||
if isinstance(completion_response, dict) and "error" in completion_response:
|
||||
print_verbose(f"completion error: {completion_response['error']}")
|
||||
|
|
|
@ -22,7 +22,9 @@ def llama_2_chat_pt(messages):
|
|||
"post_message": "\n" # follows this - https://replicate.com/blog/how-to-prompt-llama
|
||||
}
|
||||
},
|
||||
messages=messages
|
||||
messages=messages,
|
||||
bos_token="<s>",
|
||||
eos_token="</s>"
|
||||
)
|
||||
return prompt
|
||||
|
||||
|
@ -218,14 +220,26 @@ def function_call_prompt(messages: list, functions: list):
|
|||
|
||||
|
||||
# Custom prompt template
|
||||
def custom_prompt(role_dict: dict, messages: list, initial_prompt_value: str="", final_prompt_value: str=""):
|
||||
prompt = initial_prompt_value
|
||||
def custom_prompt(role_dict: dict, messages: list, initial_prompt_value: str="", final_prompt_value: str="", bos_token: str="", eos_token: str=""):
|
||||
prompt = bos_token + initial_prompt_value
|
||||
bos_open = True
|
||||
## a bos token is at the start of a system / human message
|
||||
## an eos token is at the end of the assistant response to the message
|
||||
for message in messages:
|
||||
role = message["role"]
|
||||
|
||||
if role in ["system", "human"] and not bos_open:
|
||||
prompt += bos_token
|
||||
bos_open = True
|
||||
|
||||
pre_message_str = role_dict[role]["pre_message"] if role in role_dict and "pre_message" in role_dict[role] else ""
|
||||
post_message_str = role_dict[role]["post_message"] if role in role_dict and "post_message" in role_dict[role] else ""
|
||||
prompt += pre_message_str + message["content"] + post_message_str
|
||||
|
||||
|
||||
if role == "assistant":
|
||||
prompt += eos_token
|
||||
bos_open = False
|
||||
|
||||
prompt += final_prompt_value
|
||||
return prompt
|
||||
|
||||
|
|
|
@ -230,9 +230,10 @@ def completion(
|
|||
id = kwargs.get('id', None)
|
||||
metadata = kwargs.get('metadata', None)
|
||||
fallbacks = kwargs.get('fallbacks', None)
|
||||
headers = kwargs.get("headers", None)
|
||||
######## end of unpacking kwargs ###########
|
||||
openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key"]
|
||||
litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "metadata", "fallbacks", "azure"]
|
||||
litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers"]
|
||||
default_params = openai_params + litellm_params
|
||||
non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider
|
||||
if mock_response:
|
||||
|
@ -775,10 +776,16 @@ def completion(
|
|||
or os.environ.get("HUGGINGFACE_API_KEY")
|
||||
or litellm.api_key
|
||||
)
|
||||
hf_headers = (
|
||||
headers
|
||||
or litellm.headers
|
||||
)
|
||||
print(f'headers before hf rest api: {hf_headers}')
|
||||
model_response = huggingface_restapi.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
api_base=api_base, # type: ignore
|
||||
headers=hf_headers,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
optional_params=optional_params,
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -88,13 +88,15 @@ def is_port_in_use(port):
|
|||
@click.option('--port', default=8000, help='Port to bind the server to.')
|
||||
@click.option('--api_base', default=None, help='API base URL.')
|
||||
@click.option('--model', default=None, help='The model name to pass to litellm expects')
|
||||
@click.option('--alias', default=None, help='The alias for the model - use this to give a litellm model name (e.g. "huggingface/codellama/CodeLlama-7b-Instruct-hf") a more user-friendly name ("codellama")')
|
||||
@click.option('--add_key', default=None, help='The model name to pass to litellm expects')
|
||||
@click.option('--headers', default=None, help='headers for the API call')
|
||||
@click.option('--deploy', is_flag=True, type=bool, help='Get a deployed proxy endpoint - api.litellm.ai')
|
||||
@click.option('--save', is_flag=True, type=bool, help='Save the model-specific config')
|
||||
@click.option('--debug', default=False, is_flag=True, type=bool, help='To debug the input')
|
||||
@click.option('--temperature', default=None, type=float, help='Set temperature for the model')
|
||||
@click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model')
|
||||
@click.option('--drop_params', is_flag=True, help='Drop any unmapped params')
|
||||
@click.option('--save', is_flag=True, help='Save params to config, to persist across restarts')
|
||||
@click.option('--create_proxy', is_flag=True, help='Creates a local OpenAI-compatible server template')
|
||||
@click.option('--add_function_to_prompt', is_flag=True, help='If function passed but unsupported, pass it as prompt')
|
||||
@click.option('--config', '-c', is_flag=True, help='Configure Litellm')
|
||||
|
@ -105,7 +107,7 @@ def is_port_in_use(port):
|
|||
@click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to')
|
||||
@click.option('--local', is_flag=True, default=False, help='for local debugging')
|
||||
@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
|
||||
def run_server(host, port, api_base, model, add_key, deploy, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, cost, save):
|
||||
def run_server(host, port, api_base, model, alias, add_key, headers, deploy, save, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, cost):
|
||||
global feature_telemetry
|
||||
args = locals()
|
||||
if local:
|
||||
|
@ -133,19 +135,22 @@ def run_server(host, port, api_base, model, add_key, deploy, debug, temperature,
|
|||
if logs is not None:
|
||||
if logs == 0: # default to 1
|
||||
logs = 1
|
||||
with open('api_log.json') as f:
|
||||
data = json.load(f)
|
||||
try:
|
||||
with open('api_log.json') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# convert keys to datetime objects
|
||||
log_times = {datetime.strptime(k, "%Y%m%d%H%M%S%f"): v for k, v in data.items()}
|
||||
# convert keys to datetime objects
|
||||
log_times = {datetime.strptime(k, "%Y%m%d%H%M%S%f"): v for k, v in data.items()}
|
||||
|
||||
# sort by timestamp
|
||||
sorted_times = sorted(log_times.items(), key=operator.itemgetter(0), reverse=True)
|
||||
# sort by timestamp
|
||||
sorted_times = sorted(log_times.items(), key=operator.itemgetter(0), reverse=True)
|
||||
|
||||
# get n recent logs
|
||||
recent_logs = {k.strftime("%Y%m%d%H%M%S%f"): v for k, v in sorted_times[:logs]}
|
||||
# get n recent logs
|
||||
recent_logs = {k.strftime("%Y%m%d%H%M%S%f"): v for k, v in sorted_times[:logs]}
|
||||
|
||||
print(json.dumps(recent_logs, indent=4))
|
||||
print(json.dumps(recent_logs, indent=4))
|
||||
except:
|
||||
print("LiteLLM: No logs saved!")
|
||||
return
|
||||
if add_key:
|
||||
key_name, key_value = add_key.split("=")
|
||||
|
@ -200,7 +205,9 @@ def run_server(host, port, api_base, model, add_key, deploy, debug, temperature,
|
|||
click.echo(f'LiteLLM: streaming response from proxy {chunk}')
|
||||
return
|
||||
else:
|
||||
initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt)
|
||||
if headers:
|
||||
headers = json.loads(headers)
|
||||
initialize(model=model, alias=alias, api_base=api_base, debug=debug, temperature=temperature, max_tokens=max_tokens, max_budget=max_budget, telemetry=telemetry, drop_params=drop_params, add_function_to_prompt=add_function_to_prompt, headers=headers, save=save)
|
||||
try:
|
||||
import uvicorn
|
||||
except:
|
||||
|
|
|
@ -11,15 +11,17 @@ try:
|
|||
import fastapi
|
||||
import tomli as tomllib
|
||||
import appdirs
|
||||
import tomli_w
|
||||
except ImportError:
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
subprocess.check_call([sys.executable, "-m", "pip", "install", "uvicorn", "fastapi", "tomli", "appdirs"])
|
||||
subprocess.check_call([sys.executable, "-m", "pip", "install", "uvicorn", "fastapi", "tomli", "appdirs", "tomli-w"])
|
||||
import uvicorn
|
||||
import fastapi
|
||||
import tomli as tomllib
|
||||
import appdirs
|
||||
import tomli_w
|
||||
|
||||
|
||||
import random
|
||||
|
@ -88,6 +90,7 @@ user_max_tokens = None
|
|||
user_temperature = None
|
||||
user_telemetry = True
|
||||
user_config = None
|
||||
user_headers = None
|
||||
config_filename = "litellm.secrets.toml"
|
||||
config_dir = os.getcwd()
|
||||
config_dir = appdirs.user_config_dir("litellm")
|
||||
|
@ -120,12 +123,41 @@ def add_keys_to_config(key, value):
|
|||
config.setdefault('keys', {})[key] = value
|
||||
|
||||
# Write config to file
|
||||
with open(user_config_path, 'w') as f:
|
||||
for section, data in config.items():
|
||||
f.write('[%s]\n' % section)
|
||||
for k, v in data.items():
|
||||
f.write('%s = "%s"\n' % (k, v))
|
||||
with open(user_config_path, 'wb') as f:
|
||||
tomli_w.dump(config, f)
|
||||
|
||||
def save_params_to_config(data: dict):
|
||||
# Check if file exists
|
||||
if os.path.exists(user_config_path):
|
||||
# Load existing file
|
||||
with open(user_config_path, "rb") as f:
|
||||
config = tomllib.load(f)
|
||||
else:
|
||||
# File doesn't exist, create empty config
|
||||
config = {}
|
||||
|
||||
config.setdefault('general', {})
|
||||
|
||||
## general config
|
||||
general_settings = data["general"]
|
||||
|
||||
for key, value in general_settings.items():
|
||||
config["general"][key] = value
|
||||
|
||||
## model-specific config
|
||||
config.setdefault("model", {})
|
||||
config["model"].setdefault(user_model, {})
|
||||
|
||||
user_model_config = data[user_model]
|
||||
model_key = model_key = user_model_config.pop("alias", user_model)
|
||||
config["model"].setdefault(model_key, {})
|
||||
for key, value in user_model_config.items():
|
||||
config["model"][model_key][key] = value
|
||||
|
||||
# Write config to file
|
||||
with open(user_config_path, 'wb') as f:
|
||||
tomli_w.dump(config, f)
|
||||
|
||||
|
||||
def load_config():
|
||||
try:
|
||||
|
@ -138,7 +170,6 @@ def load_config():
|
|||
if "keys" in user_config:
|
||||
for key in user_config["keys"]:
|
||||
os.environ[key] = user_config["keys"][key] # litellm can read keys from the environment
|
||||
|
||||
## settings
|
||||
if "general" in user_config:
|
||||
litellm.add_function_to_prompt = user_config["general"].get("add_function_to_prompt", True) # by default add function to prompt if unsupported by provider
|
||||
|
@ -191,24 +222,42 @@ def load_config():
|
|||
except Exception as e:
|
||||
pass
|
||||
|
||||
def initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt):
|
||||
global user_model, user_api_base, user_debug, user_max_tokens, user_temperature, user_telemetry
|
||||
def initialize(model, alias, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt, headers, save):
|
||||
global user_model, user_api_base, user_debug, user_max_tokens, user_temperature, user_telemetry, user_headers
|
||||
user_model = model
|
||||
user_debug = debug
|
||||
|
||||
load_config()
|
||||
user_api_base = api_base
|
||||
user_max_tokens = max_tokens
|
||||
user_temperature = temperature
|
||||
dynamic_config = {"general": {}, user_model: {}}
|
||||
if headers: # model-specific param
|
||||
user_headers = headers
|
||||
dynamic_config[user_model]["headers"] = headers
|
||||
if api_base: # model-specific param
|
||||
user_api_base = api_base
|
||||
dynamic_config[user_model]["api_base"] = api_base
|
||||
if max_tokens: # model-specific param
|
||||
user_max_tokens = max_tokens
|
||||
dynamic_config[user_model]["max_tokens"] = max_tokens
|
||||
if temperature: # model-specific param
|
||||
user_temperature = temperature
|
||||
dynamic_config[user_model]["temperature"] = temperature
|
||||
if alias: # model-specific param
|
||||
dynamic_config[user_model]["alias"] = alias
|
||||
if drop_params == True: # litellm-specific param
|
||||
litellm.drop_params = True
|
||||
dynamic_config["general"]["drop_params"] = True
|
||||
if add_function_to_prompt == True: # litellm-specific param
|
||||
litellm.add_function_to_prompt = True
|
||||
dynamic_config["general"]["add_function_to_prompt"] = True
|
||||
if max_budget: # litellm-specific param
|
||||
litellm.max_budget = max_budget
|
||||
dynamic_config["general"]["max_budget"] = max_budget
|
||||
if save:
|
||||
save_params_to_config(dynamic_config)
|
||||
with open(user_config_path) as f:
|
||||
print(f.read())
|
||||
print("\033[1;32mDone successfully\033[0m")
|
||||
user_telemetry = telemetry
|
||||
usage_telemetry(feature="local_proxy_server")
|
||||
if drop_params == True:
|
||||
litellm.drop_params = True
|
||||
if add_function_to_prompt == True:
|
||||
litellm.add_function_to_prompt = True
|
||||
if max_budget:
|
||||
litellm.max_budget = max_budget
|
||||
|
||||
|
||||
def deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy):
|
||||
import requests
|
||||
|
@ -354,9 +403,12 @@ def logger(
|
|||
existing_data = {}
|
||||
|
||||
existing_data.update(log_data)
|
||||
|
||||
with open(log_file, 'w') as f:
|
||||
json.dump(existing_data, f, indent=2)
|
||||
def write_to_log():
|
||||
with open(log_file, 'w') as f:
|
||||
json.dump(existing_data, f, indent=2)
|
||||
|
||||
thread = threading.Thread(target=write_to_log, daemon=True)
|
||||
thread.start()
|
||||
elif log_event_type == 'post_api_call':
|
||||
if "stream" not in kwargs["optional_params"] or kwargs["optional_params"]["stream"] is False or kwargs.get("complete_streaming_response", False):
|
||||
inference_params = copy.deepcopy(kwargs)
|
||||
|
@ -367,9 +419,13 @@ def logger(
|
|||
existing_data = json.load(f)
|
||||
|
||||
existing_data[dt_key]['post_api_call'] = inference_params
|
||||
|
||||
with open(log_file, 'w') as f:
|
||||
json.dump(existing_data, f, indent=2)
|
||||
|
||||
def write_to_log():
|
||||
with open(log_file, 'w') as f:
|
||||
json.dump(existing_data, f, indent=2)
|
||||
|
||||
thread = threading.Thread(target=write_to_log, daemon=True)
|
||||
thread.start()
|
||||
except:
|
||||
traceback.print_exc()
|
||||
|
||||
|
@ -388,6 +444,8 @@ def litellm_completion(data, type):
|
|||
data["max_tokens"] = user_max_tokens
|
||||
if user_api_base:
|
||||
data["api_base"] = user_api_base
|
||||
if user_headers:
|
||||
data["headers"] = user_headers
|
||||
if type == "completion":
|
||||
response = litellm.text_completion(**data)
|
||||
elif type == "chat_completion":
|
||||
|
@ -397,6 +455,7 @@ def litellm_completion(data, type):
|
|||
print_verbose(f"response: {response}")
|
||||
return response
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
if "Invalid response object from API" in str(e):
|
||||
completion_call_details = {}
|
||||
if user_model:
|
||||
|
|
|
@ -3207,28 +3207,32 @@ class CustomStreamWrapper:
|
|||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
|
||||
def handle_huggingface_chunk(self, chunk):
|
||||
chunk = chunk.decode("utf-8")
|
||||
text = ""
|
||||
is_finished = False
|
||||
finish_reason = ""
|
||||
print_verbose(f"chunk: {chunk}")
|
||||
if chunk.startswith("data:"):
|
||||
data_json = json.loads(chunk[5:])
|
||||
print_verbose(f"data json: {data_json}")
|
||||
if "token" in data_json and "text" in data_json["token"]:
|
||||
text = data_json["token"]["text"]
|
||||
if data_json.get("details", False) and data_json["details"].get("finish_reason", False):
|
||||
is_finished = True
|
||||
finish_reason = data_json["details"]["finish_reason"]
|
||||
elif data_json.get("generated_text", False): # if full generated text exists, then stream is complete
|
||||
text = "" # don't return the final bos token
|
||||
is_finished = True
|
||||
finish_reason = "stop"
|
||||
try:
|
||||
chunk = chunk.decode("utf-8")
|
||||
text = ""
|
||||
is_finished = False
|
||||
finish_reason = ""
|
||||
print_verbose(f"chunk: {chunk}")
|
||||
if chunk.startswith("data:"):
|
||||
data_json = json.loads(chunk[5:])
|
||||
print_verbose(f"data json: {data_json}")
|
||||
if "token" in data_json and "text" in data_json["token"]:
|
||||
text = data_json["token"]["text"]
|
||||
if data_json.get("details", False) and data_json["details"].get("finish_reason", False):
|
||||
is_finished = True
|
||||
finish_reason = data_json["details"]["finish_reason"]
|
||||
elif data_json.get("generated_text", False): # if full generated text exists, then stream is complete
|
||||
text = "" # don't return the final bos token
|
||||
is_finished = True
|
||||
finish_reason = "stop"
|
||||
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
elif "error" in chunk:
|
||||
raise ValueError(chunk)
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
elif "error" in chunk:
|
||||
raise ValueError(chunk)
|
||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
# raise(e)
|
||||
|
||||
def handle_ai21_chunk(self, chunk): # fake streaming
|
||||
chunk = chunk.decode("utf-8")
|
||||
|
|
2
poetry.lock
generated
2
poetry.lock
generated
|
@ -1,4 +1,4 @@
|
|||
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohttp"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "0.8.4"
|
||||
version = "0.8.5"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT License"
|
||||
|
@ -26,7 +26,7 @@ requires = ["poetry-core"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "0.8.4"
|
||||
version = "0.8.5"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue