forked from phoenix/litellm-mirror
bump: version 0.8.4 → 0.8.5
This commit is contained in:
parent
80c60e71c1
commit
7358d2e4ea
11 changed files with 228 additions and 7343 deletions
Binary file not shown.
Binary file not shown.
|
@ -6,7 +6,7 @@ import requests
|
||||||
import time
|
import time
|
||||||
import litellm
|
import litellm
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
from litellm.utils import ModelResponse, Choices, Message
|
from litellm.utils import ModelResponse, Choices, Message, CustomStreamWrapper
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||||
|
|
||||||
|
@ -65,12 +65,17 @@ class HuggingfaceConfig():
|
||||||
and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
|
and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
|
||||||
and v is not None}
|
and v is not None}
|
||||||
|
|
||||||
def validate_environment(api_key):
|
def validate_environment(api_key, headers):
|
||||||
headers = {
|
default_headers = {
|
||||||
"content-type": "application/json",
|
"content-type": "application/json",
|
||||||
}
|
}
|
||||||
if api_key:
|
if api_key and headers is None:
|
||||||
headers["Authorization"] = f"Bearer {api_key}"
|
default_headers["Authorization"] = f"Bearer {api_key}" # Huggingface Inference Endpoint default is to accept bearer tokens
|
||||||
|
headers = default_headers
|
||||||
|
elif headers:
|
||||||
|
headers=headers
|
||||||
|
else:
|
||||||
|
headers = default_headers
|
||||||
return headers
|
return headers
|
||||||
|
|
||||||
tgi_models_cache = None
|
tgi_models_cache = None
|
||||||
|
@ -125,6 +130,7 @@ def completion(
|
||||||
model: str,
|
model: str,
|
||||||
messages: list,
|
messages: list,
|
||||||
api_base: Optional[str],
|
api_base: Optional[str],
|
||||||
|
headers: Optional[dict],
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
print_verbose: Callable,
|
print_verbose: Callable,
|
||||||
encoding,
|
encoding,
|
||||||
|
@ -135,7 +141,8 @@ def completion(
|
||||||
litellm_params=None,
|
litellm_params=None,
|
||||||
logger_fn=None,
|
logger_fn=None,
|
||||||
):
|
):
|
||||||
headers = validate_environment(api_key)
|
print(f'headers inside hf rest api: {headers}')
|
||||||
|
headers = validate_environment(api_key, headers)
|
||||||
task = get_hf_task_for_model(model)
|
task = get_hf_task_for_model(model)
|
||||||
print_verbose(f"{model}, {task}")
|
print_verbose(f"{model}, {task}")
|
||||||
completion_url = ""
|
completion_url = ""
|
||||||
|
@ -227,7 +234,7 @@ def completion(
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=input_text,
|
input=input_text,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
additional_args={"complete_input_dict": data, "task": task},
|
additional_args={"complete_input_dict": data, "task": task, "headers": headers},
|
||||||
)
|
)
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
|
@ -244,20 +251,43 @@ def completion(
|
||||||
headers=headers,
|
headers=headers,
|
||||||
data=json.dumps(data)
|
data=json.dumps(data)
|
||||||
)
|
)
|
||||||
## LOGGING
|
|
||||||
logging_obj.post_call(
|
## Some servers might return streaming responses even though stream was not set to true. (e.g. Baseten)
|
||||||
input=input_text,
|
is_streamed = False
|
||||||
api_key=api_key,
|
print(f"response keys: {response.__dict__.keys()}")
|
||||||
original_response=response.text,
|
print(f"response keys: {response.__dict__['headers']}")
|
||||||
additional_args={"complete_input_dict": data, "task": task},
|
if response.__dict__['headers']["Content-Type"] == "text/event-stream":
|
||||||
)
|
is_streamed = True
|
||||||
## RESPONSE OBJECT
|
|
||||||
try:
|
# iterate over the complete streamed response, and return the final answer
|
||||||
completion_response = response.json()
|
if is_streamed:
|
||||||
except:
|
streamed_response = CustomStreamWrapper(completion_stream=response.iter_lines(), model=model, custom_llm_provider="huggingface", logging_obj=logging_obj)
|
||||||
raise HuggingfaceError(
|
content = ""
|
||||||
message=response.text, status_code=response.status_code
|
for chunk in streamed_response:
|
||||||
|
content += chunk["choices"][0]["delta"]["content"]
|
||||||
|
completion_response = [{"generated_text": content}]
|
||||||
|
## LOGGING
|
||||||
|
logging_obj.post_call(
|
||||||
|
input=input_text,
|
||||||
|
api_key=api_key,
|
||||||
|
original_response=completion_response,
|
||||||
|
additional_args={"complete_input_dict": data, "task": task},
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
## LOGGING
|
||||||
|
logging_obj.post_call(
|
||||||
|
input=input_text,
|
||||||
|
api_key=api_key,
|
||||||
|
original_response=response.text,
|
||||||
|
additional_args={"complete_input_dict": data, "task": task},
|
||||||
|
)
|
||||||
|
## RESPONSE OBJECT
|
||||||
|
try:
|
||||||
|
completion_response = response.json()
|
||||||
|
except:
|
||||||
|
raise HuggingfaceError(
|
||||||
|
message=response.text, status_code=response.status_code
|
||||||
|
)
|
||||||
print_verbose(f"response: {completion_response}")
|
print_verbose(f"response: {completion_response}")
|
||||||
if isinstance(completion_response, dict) and "error" in completion_response:
|
if isinstance(completion_response, dict) and "error" in completion_response:
|
||||||
print_verbose(f"completion error: {completion_response['error']}")
|
print_verbose(f"completion error: {completion_response['error']}")
|
||||||
|
|
|
@ -22,7 +22,9 @@ def llama_2_chat_pt(messages):
|
||||||
"post_message": "\n" # follows this - https://replicate.com/blog/how-to-prompt-llama
|
"post_message": "\n" # follows this - https://replicate.com/blog/how-to-prompt-llama
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
messages=messages
|
messages=messages,
|
||||||
|
bos_token="<s>",
|
||||||
|
eos_token="</s>"
|
||||||
)
|
)
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
|
@ -218,14 +220,26 @@ def function_call_prompt(messages: list, functions: list):
|
||||||
|
|
||||||
|
|
||||||
# Custom prompt template
|
# Custom prompt template
|
||||||
def custom_prompt(role_dict: dict, messages: list, initial_prompt_value: str="", final_prompt_value: str=""):
|
def custom_prompt(role_dict: dict, messages: list, initial_prompt_value: str="", final_prompt_value: str="", bos_token: str="", eos_token: str=""):
|
||||||
prompt = initial_prompt_value
|
prompt = bos_token + initial_prompt_value
|
||||||
|
bos_open = True
|
||||||
|
## a bos token is at the start of a system / human message
|
||||||
|
## an eos token is at the end of the assistant response to the message
|
||||||
for message in messages:
|
for message in messages:
|
||||||
role = message["role"]
|
role = message["role"]
|
||||||
|
|
||||||
|
if role in ["system", "human"] and not bos_open:
|
||||||
|
prompt += bos_token
|
||||||
|
bos_open = True
|
||||||
|
|
||||||
pre_message_str = role_dict[role]["pre_message"] if role in role_dict and "pre_message" in role_dict[role] else ""
|
pre_message_str = role_dict[role]["pre_message"] if role in role_dict and "pre_message" in role_dict[role] else ""
|
||||||
post_message_str = role_dict[role]["post_message"] if role in role_dict and "post_message" in role_dict[role] else ""
|
post_message_str = role_dict[role]["post_message"] if role in role_dict and "post_message" in role_dict[role] else ""
|
||||||
prompt += pre_message_str + message["content"] + post_message_str
|
prompt += pre_message_str + message["content"] + post_message_str
|
||||||
|
|
||||||
|
if role == "assistant":
|
||||||
|
prompt += eos_token
|
||||||
|
bos_open = False
|
||||||
|
|
||||||
prompt += final_prompt_value
|
prompt += final_prompt_value
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
|
|
|
@ -230,9 +230,10 @@ def completion(
|
||||||
id = kwargs.get('id', None)
|
id = kwargs.get('id', None)
|
||||||
metadata = kwargs.get('metadata', None)
|
metadata = kwargs.get('metadata', None)
|
||||||
fallbacks = kwargs.get('fallbacks', None)
|
fallbacks = kwargs.get('fallbacks', None)
|
||||||
|
headers = kwargs.get("headers", None)
|
||||||
######## end of unpacking kwargs ###########
|
######## end of unpacking kwargs ###########
|
||||||
openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key"]
|
openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key"]
|
||||||
litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "metadata", "fallbacks", "azure"]
|
litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers"]
|
||||||
default_params = openai_params + litellm_params
|
default_params = openai_params + litellm_params
|
||||||
non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider
|
non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider
|
||||||
if mock_response:
|
if mock_response:
|
||||||
|
@ -775,10 +776,16 @@ def completion(
|
||||||
or os.environ.get("HUGGINGFACE_API_KEY")
|
or os.environ.get("HUGGINGFACE_API_KEY")
|
||||||
or litellm.api_key
|
or litellm.api_key
|
||||||
)
|
)
|
||||||
|
hf_headers = (
|
||||||
|
headers
|
||||||
|
or litellm.headers
|
||||||
|
)
|
||||||
|
print(f'headers before hf rest api: {hf_headers}')
|
||||||
model_response = huggingface_restapi.completion(
|
model_response = huggingface_restapi.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
api_base=api_base, # type: ignore
|
api_base=api_base, # type: ignore
|
||||||
|
headers=hf_headers,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -88,13 +88,15 @@ def is_port_in_use(port):
|
||||||
@click.option('--port', default=8000, help='Port to bind the server to.')
|
@click.option('--port', default=8000, help='Port to bind the server to.')
|
||||||
@click.option('--api_base', default=None, help='API base URL.')
|
@click.option('--api_base', default=None, help='API base URL.')
|
||||||
@click.option('--model', default=None, help='The model name to pass to litellm expects')
|
@click.option('--model', default=None, help='The model name to pass to litellm expects')
|
||||||
|
@click.option('--alias', default=None, help='The alias for the model - use this to give a litellm model name (e.g. "huggingface/codellama/CodeLlama-7b-Instruct-hf") a more user-friendly name ("codellama")')
|
||||||
@click.option('--add_key', default=None, help='The model name to pass to litellm expects')
|
@click.option('--add_key', default=None, help='The model name to pass to litellm expects')
|
||||||
|
@click.option('--headers', default=None, help='headers for the API call')
|
||||||
@click.option('--deploy', is_flag=True, type=bool, help='Get a deployed proxy endpoint - api.litellm.ai')
|
@click.option('--deploy', is_flag=True, type=bool, help='Get a deployed proxy endpoint - api.litellm.ai')
|
||||||
|
@click.option('--save', is_flag=True, type=bool, help='Save the model-specific config')
|
||||||
@click.option('--debug', default=False, is_flag=True, type=bool, help='To debug the input')
|
@click.option('--debug', default=False, is_flag=True, type=bool, help='To debug the input')
|
||||||
@click.option('--temperature', default=None, type=float, help='Set temperature for the model')
|
@click.option('--temperature', default=None, type=float, help='Set temperature for the model')
|
||||||
@click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model')
|
@click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model')
|
||||||
@click.option('--drop_params', is_flag=True, help='Drop any unmapped params')
|
@click.option('--drop_params', is_flag=True, help='Drop any unmapped params')
|
||||||
@click.option('--save', is_flag=True, help='Save params to config, to persist across restarts')
|
|
||||||
@click.option('--create_proxy', is_flag=True, help='Creates a local OpenAI-compatible server template')
|
@click.option('--create_proxy', is_flag=True, help='Creates a local OpenAI-compatible server template')
|
||||||
@click.option('--add_function_to_prompt', is_flag=True, help='If function passed but unsupported, pass it as prompt')
|
@click.option('--add_function_to_prompt', is_flag=True, help='If function passed but unsupported, pass it as prompt')
|
||||||
@click.option('--config', '-c', is_flag=True, help='Configure Litellm')
|
@click.option('--config', '-c', is_flag=True, help='Configure Litellm')
|
||||||
|
@ -105,7 +107,7 @@ def is_port_in_use(port):
|
||||||
@click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to')
|
@click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to')
|
||||||
@click.option('--local', is_flag=True, default=False, help='for local debugging')
|
@click.option('--local', is_flag=True, default=False, help='for local debugging')
|
||||||
@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
|
@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
|
||||||
def run_server(host, port, api_base, model, add_key, deploy, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, cost, save):
|
def run_server(host, port, api_base, model, alias, add_key, headers, deploy, save, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, cost):
|
||||||
global feature_telemetry
|
global feature_telemetry
|
||||||
args = locals()
|
args = locals()
|
||||||
if local:
|
if local:
|
||||||
|
@ -133,19 +135,22 @@ def run_server(host, port, api_base, model, add_key, deploy, debug, temperature,
|
||||||
if logs is not None:
|
if logs is not None:
|
||||||
if logs == 0: # default to 1
|
if logs == 0: # default to 1
|
||||||
logs = 1
|
logs = 1
|
||||||
with open('api_log.json') as f:
|
try:
|
||||||
data = json.load(f)
|
with open('api_log.json') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
# convert keys to datetime objects
|
# convert keys to datetime objects
|
||||||
log_times = {datetime.strptime(k, "%Y%m%d%H%M%S%f"): v for k, v in data.items()}
|
log_times = {datetime.strptime(k, "%Y%m%d%H%M%S%f"): v for k, v in data.items()}
|
||||||
|
|
||||||
# sort by timestamp
|
# sort by timestamp
|
||||||
sorted_times = sorted(log_times.items(), key=operator.itemgetter(0), reverse=True)
|
sorted_times = sorted(log_times.items(), key=operator.itemgetter(0), reverse=True)
|
||||||
|
|
||||||
# get n recent logs
|
# get n recent logs
|
||||||
recent_logs = {k.strftime("%Y%m%d%H%M%S%f"): v for k, v in sorted_times[:logs]}
|
recent_logs = {k.strftime("%Y%m%d%H%M%S%f"): v for k, v in sorted_times[:logs]}
|
||||||
|
|
||||||
print(json.dumps(recent_logs, indent=4))
|
print(json.dumps(recent_logs, indent=4))
|
||||||
|
except:
|
||||||
|
print("LiteLLM: No logs saved!")
|
||||||
return
|
return
|
||||||
if add_key:
|
if add_key:
|
||||||
key_name, key_value = add_key.split("=")
|
key_name, key_value = add_key.split("=")
|
||||||
|
@ -200,7 +205,9 @@ def run_server(host, port, api_base, model, add_key, deploy, debug, temperature,
|
||||||
click.echo(f'LiteLLM: streaming response from proxy {chunk}')
|
click.echo(f'LiteLLM: streaming response from proxy {chunk}')
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt)
|
if headers:
|
||||||
|
headers = json.loads(headers)
|
||||||
|
initialize(model=model, alias=alias, api_base=api_base, debug=debug, temperature=temperature, max_tokens=max_tokens, max_budget=max_budget, telemetry=telemetry, drop_params=drop_params, add_function_to_prompt=add_function_to_prompt, headers=headers, save=save)
|
||||||
try:
|
try:
|
||||||
import uvicorn
|
import uvicorn
|
||||||
except:
|
except:
|
||||||
|
|
|
@ -11,15 +11,17 @@ try:
|
||||||
import fastapi
|
import fastapi
|
||||||
import tomli as tomllib
|
import tomli as tomllib
|
||||||
import appdirs
|
import appdirs
|
||||||
|
import tomli_w
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
subprocess.check_call([sys.executable, "-m", "pip", "install", "uvicorn", "fastapi", "tomli", "appdirs"])
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "uvicorn", "fastapi", "tomli", "appdirs", "tomli-w"])
|
||||||
import uvicorn
|
import uvicorn
|
||||||
import fastapi
|
import fastapi
|
||||||
import tomli as tomllib
|
import tomli as tomllib
|
||||||
import appdirs
|
import appdirs
|
||||||
|
import tomli_w
|
||||||
|
|
||||||
|
|
||||||
import random
|
import random
|
||||||
|
@ -88,6 +90,7 @@ user_max_tokens = None
|
||||||
user_temperature = None
|
user_temperature = None
|
||||||
user_telemetry = True
|
user_telemetry = True
|
||||||
user_config = None
|
user_config = None
|
||||||
|
user_headers = None
|
||||||
config_filename = "litellm.secrets.toml"
|
config_filename = "litellm.secrets.toml"
|
||||||
config_dir = os.getcwd()
|
config_dir = os.getcwd()
|
||||||
config_dir = appdirs.user_config_dir("litellm")
|
config_dir = appdirs.user_config_dir("litellm")
|
||||||
|
@ -120,12 +123,41 @@ def add_keys_to_config(key, value):
|
||||||
config.setdefault('keys', {})[key] = value
|
config.setdefault('keys', {})[key] = value
|
||||||
|
|
||||||
# Write config to file
|
# Write config to file
|
||||||
with open(user_config_path, 'w') as f:
|
with open(user_config_path, 'wb') as f:
|
||||||
for section, data in config.items():
|
tomli_w.dump(config, f)
|
||||||
f.write('[%s]\n' % section)
|
|
||||||
for k, v in data.items():
|
|
||||||
f.write('%s = "%s"\n' % (k, v))
|
|
||||||
|
|
||||||
|
def save_params_to_config(data: dict):
|
||||||
|
# Check if file exists
|
||||||
|
if os.path.exists(user_config_path):
|
||||||
|
# Load existing file
|
||||||
|
with open(user_config_path, "rb") as f:
|
||||||
|
config = tomllib.load(f)
|
||||||
|
else:
|
||||||
|
# File doesn't exist, create empty config
|
||||||
|
config = {}
|
||||||
|
|
||||||
|
config.setdefault('general', {})
|
||||||
|
|
||||||
|
## general config
|
||||||
|
general_settings = data["general"]
|
||||||
|
|
||||||
|
for key, value in general_settings.items():
|
||||||
|
config["general"][key] = value
|
||||||
|
|
||||||
|
## model-specific config
|
||||||
|
config.setdefault("model", {})
|
||||||
|
config["model"].setdefault(user_model, {})
|
||||||
|
|
||||||
|
user_model_config = data[user_model]
|
||||||
|
model_key = model_key = user_model_config.pop("alias", user_model)
|
||||||
|
config["model"].setdefault(model_key, {})
|
||||||
|
for key, value in user_model_config.items():
|
||||||
|
config["model"][model_key][key] = value
|
||||||
|
|
||||||
|
# Write config to file
|
||||||
|
with open(user_config_path, 'wb') as f:
|
||||||
|
tomli_w.dump(config, f)
|
||||||
|
|
||||||
|
|
||||||
def load_config():
|
def load_config():
|
||||||
try:
|
try:
|
||||||
|
@ -138,7 +170,6 @@ def load_config():
|
||||||
if "keys" in user_config:
|
if "keys" in user_config:
|
||||||
for key in user_config["keys"]:
|
for key in user_config["keys"]:
|
||||||
os.environ[key] = user_config["keys"][key] # litellm can read keys from the environment
|
os.environ[key] = user_config["keys"][key] # litellm can read keys from the environment
|
||||||
|
|
||||||
## settings
|
## settings
|
||||||
if "general" in user_config:
|
if "general" in user_config:
|
||||||
litellm.add_function_to_prompt = user_config["general"].get("add_function_to_prompt", True) # by default add function to prompt if unsupported by provider
|
litellm.add_function_to_prompt = user_config["general"].get("add_function_to_prompt", True) # by default add function to prompt if unsupported by provider
|
||||||
|
@ -191,24 +222,42 @@ def load_config():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt):
|
def initialize(model, alias, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt, headers, save):
|
||||||
global user_model, user_api_base, user_debug, user_max_tokens, user_temperature, user_telemetry
|
global user_model, user_api_base, user_debug, user_max_tokens, user_temperature, user_telemetry, user_headers
|
||||||
user_model = model
|
user_model = model
|
||||||
user_debug = debug
|
user_debug = debug
|
||||||
|
|
||||||
load_config()
|
load_config()
|
||||||
user_api_base = api_base
|
dynamic_config = {"general": {}, user_model: {}}
|
||||||
user_max_tokens = max_tokens
|
if headers: # model-specific param
|
||||||
user_temperature = temperature
|
user_headers = headers
|
||||||
|
dynamic_config[user_model]["headers"] = headers
|
||||||
|
if api_base: # model-specific param
|
||||||
|
user_api_base = api_base
|
||||||
|
dynamic_config[user_model]["api_base"] = api_base
|
||||||
|
if max_tokens: # model-specific param
|
||||||
|
user_max_tokens = max_tokens
|
||||||
|
dynamic_config[user_model]["max_tokens"] = max_tokens
|
||||||
|
if temperature: # model-specific param
|
||||||
|
user_temperature = temperature
|
||||||
|
dynamic_config[user_model]["temperature"] = temperature
|
||||||
|
if alias: # model-specific param
|
||||||
|
dynamic_config[user_model]["alias"] = alias
|
||||||
|
if drop_params == True: # litellm-specific param
|
||||||
|
litellm.drop_params = True
|
||||||
|
dynamic_config["general"]["drop_params"] = True
|
||||||
|
if add_function_to_prompt == True: # litellm-specific param
|
||||||
|
litellm.add_function_to_prompt = True
|
||||||
|
dynamic_config["general"]["add_function_to_prompt"] = True
|
||||||
|
if max_budget: # litellm-specific param
|
||||||
|
litellm.max_budget = max_budget
|
||||||
|
dynamic_config["general"]["max_budget"] = max_budget
|
||||||
|
if save:
|
||||||
|
save_params_to_config(dynamic_config)
|
||||||
|
with open(user_config_path) as f:
|
||||||
|
print(f.read())
|
||||||
|
print("\033[1;32mDone successfully\033[0m")
|
||||||
user_telemetry = telemetry
|
user_telemetry = telemetry
|
||||||
usage_telemetry(feature="local_proxy_server")
|
usage_telemetry(feature="local_proxy_server")
|
||||||
if drop_params == True:
|
|
||||||
litellm.drop_params = True
|
|
||||||
if add_function_to_prompt == True:
|
|
||||||
litellm.add_function_to_prompt = True
|
|
||||||
if max_budget:
|
|
||||||
litellm.max_budget = max_budget
|
|
||||||
|
|
||||||
|
|
||||||
def deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy):
|
def deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy):
|
||||||
import requests
|
import requests
|
||||||
|
@ -354,9 +403,12 @@ def logger(
|
||||||
existing_data = {}
|
existing_data = {}
|
||||||
|
|
||||||
existing_data.update(log_data)
|
existing_data.update(log_data)
|
||||||
|
def write_to_log():
|
||||||
with open(log_file, 'w') as f:
|
with open(log_file, 'w') as f:
|
||||||
json.dump(existing_data, f, indent=2)
|
json.dump(existing_data, f, indent=2)
|
||||||
|
|
||||||
|
thread = threading.Thread(target=write_to_log, daemon=True)
|
||||||
|
thread.start()
|
||||||
elif log_event_type == 'post_api_call':
|
elif log_event_type == 'post_api_call':
|
||||||
if "stream" not in kwargs["optional_params"] or kwargs["optional_params"]["stream"] is False or kwargs.get("complete_streaming_response", False):
|
if "stream" not in kwargs["optional_params"] or kwargs["optional_params"]["stream"] is False or kwargs.get("complete_streaming_response", False):
|
||||||
inference_params = copy.deepcopy(kwargs)
|
inference_params = copy.deepcopy(kwargs)
|
||||||
|
@ -367,9 +419,13 @@ def logger(
|
||||||
existing_data = json.load(f)
|
existing_data = json.load(f)
|
||||||
|
|
||||||
existing_data[dt_key]['post_api_call'] = inference_params
|
existing_data[dt_key]['post_api_call'] = inference_params
|
||||||
|
|
||||||
with open(log_file, 'w') as f:
|
def write_to_log():
|
||||||
json.dump(existing_data, f, indent=2)
|
with open(log_file, 'w') as f:
|
||||||
|
json.dump(existing_data, f, indent=2)
|
||||||
|
|
||||||
|
thread = threading.Thread(target=write_to_log, daemon=True)
|
||||||
|
thread.start()
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
|
@ -388,6 +444,8 @@ def litellm_completion(data, type):
|
||||||
data["max_tokens"] = user_max_tokens
|
data["max_tokens"] = user_max_tokens
|
||||||
if user_api_base:
|
if user_api_base:
|
||||||
data["api_base"] = user_api_base
|
data["api_base"] = user_api_base
|
||||||
|
if user_headers:
|
||||||
|
data["headers"] = user_headers
|
||||||
if type == "completion":
|
if type == "completion":
|
||||||
response = litellm.text_completion(**data)
|
response = litellm.text_completion(**data)
|
||||||
elif type == "chat_completion":
|
elif type == "chat_completion":
|
||||||
|
@ -397,6 +455,7 @@ def litellm_completion(data, type):
|
||||||
print_verbose(f"response: {response}")
|
print_verbose(f"response: {response}")
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
if "Invalid response object from API" in str(e):
|
if "Invalid response object from API" in str(e):
|
||||||
completion_call_details = {}
|
completion_call_details = {}
|
||||||
if user_model:
|
if user_model:
|
||||||
|
|
|
@ -3207,28 +3207,32 @@ class CustomStreamWrapper:
|
||||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
|
|
||||||
def handle_huggingface_chunk(self, chunk):
|
def handle_huggingface_chunk(self, chunk):
|
||||||
chunk = chunk.decode("utf-8")
|
try:
|
||||||
text = ""
|
chunk = chunk.decode("utf-8")
|
||||||
is_finished = False
|
text = ""
|
||||||
finish_reason = ""
|
is_finished = False
|
||||||
print_verbose(f"chunk: {chunk}")
|
finish_reason = ""
|
||||||
if chunk.startswith("data:"):
|
print_verbose(f"chunk: {chunk}")
|
||||||
data_json = json.loads(chunk[5:])
|
if chunk.startswith("data:"):
|
||||||
print_verbose(f"data json: {data_json}")
|
data_json = json.loads(chunk[5:])
|
||||||
if "token" in data_json and "text" in data_json["token"]:
|
print_verbose(f"data json: {data_json}")
|
||||||
text = data_json["token"]["text"]
|
if "token" in data_json and "text" in data_json["token"]:
|
||||||
if data_json.get("details", False) and data_json["details"].get("finish_reason", False):
|
text = data_json["token"]["text"]
|
||||||
is_finished = True
|
if data_json.get("details", False) and data_json["details"].get("finish_reason", False):
|
||||||
finish_reason = data_json["details"]["finish_reason"]
|
is_finished = True
|
||||||
elif data_json.get("generated_text", False): # if full generated text exists, then stream is complete
|
finish_reason = data_json["details"]["finish_reason"]
|
||||||
text = "" # don't return the final bos token
|
elif data_json.get("generated_text", False): # if full generated text exists, then stream is complete
|
||||||
is_finished = True
|
text = "" # don't return the final bos token
|
||||||
finish_reason = "stop"
|
is_finished = True
|
||||||
|
finish_reason = "stop"
|
||||||
|
|
||||||
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
|
elif "error" in chunk:
|
||||||
|
raise ValueError(chunk)
|
||||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
elif "error" in chunk:
|
except Exception as e:
|
||||||
raise ValueError(chunk)
|
traceback.print_exc()
|
||||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
# raise(e)
|
||||||
|
|
||||||
def handle_ai21_chunk(self, chunk): # fake streaming
|
def handle_ai21_chunk(self, chunk): # fake streaming
|
||||||
chunk = chunk.decode("utf-8")
|
chunk = chunk.decode("utf-8")
|
||||||
|
|
2
poetry.lock
generated
2
poetry.lock
generated
|
@ -1,4 +1,4 @@
|
||||||
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aiohttp"
|
name = "aiohttp"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.8.4"
|
version = "0.8.5"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
@ -26,7 +26,7 @@ requires = ["poetry-core"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "0.8.4"
|
version = "0.8.5"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue