forked from phoenix/litellm-mirror
improvements to proxy cli and finish reason mapping for anthropic
This commit is contained in:
parent
0cc34d6543
commit
3ca79a88bb
8 changed files with 84 additions and 25 deletions
Binary file not shown.
|
@ -217,5 +217,4 @@ def prompt_factory(model: str, messages: list):
|
||||||
else:
|
else:
|
||||||
return hf_chat_template(original_model_name, messages)
|
return hf_chat_template(original_model_name, messages)
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
|
||||||
return default_pt(messages=messages) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2)
|
return default_pt(messages=messages) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2)
|
|
@ -1,9 +1,55 @@
|
||||||
import click
|
import click
|
||||||
import subprocess
|
import subprocess
|
||||||
import os
|
import os, appdirs
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
from importlib import resources
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
config_filename = ".env.litellm"
|
||||||
|
|
||||||
|
# Using appdirs to determine user-specific config path
|
||||||
|
config_dir = appdirs.user_config_dir("litellm")
|
||||||
|
user_config_path = os.path.join(config_dir, config_filename)
|
||||||
|
|
||||||
|
def load_config():
|
||||||
|
try:
|
||||||
|
if not os.path.exists(user_config_path):
|
||||||
|
# If user's config doesn't exist, copy the default config from the package
|
||||||
|
here = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
parent_dir = os.path.dirname(here)
|
||||||
|
default_config_path = os.path.join(parent_dir, '.env.template')
|
||||||
|
# Ensure the user-specific directory exists
|
||||||
|
os.makedirs(config_dir, exist_ok=True)
|
||||||
|
# Copying the file using shutil.copy
|
||||||
|
shutil.copy(default_config_path, user_config_path)
|
||||||
|
|
||||||
|
# As the .env file is typically much simpler in structure, we use load_dotenv here directly
|
||||||
|
load_dotenv(dotenv_path=user_config_path)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def open_config():
|
||||||
|
# Create the .env file if it doesn't exist
|
||||||
|
if not os.path.exists(user_config_path):
|
||||||
|
# If user's env doesn't exist, copy the default env from the package
|
||||||
|
here = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
parent_dir = os.path.dirname(here)
|
||||||
|
default_env_path = os.path.join(parent_dir, '.env.template')
|
||||||
|
# Ensure the user-specific directory exists
|
||||||
|
os.makedirs(config_dir, exist_ok=True)
|
||||||
|
# Copying the file using shutil.copy
|
||||||
|
try:
|
||||||
|
shutil.copy(default_env_path, user_config_path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to copy .env.template: {e}")
|
||||||
|
|
||||||
|
# Open the .env file in the default editor
|
||||||
|
if os.name == 'nt': # For Windows
|
||||||
|
os.startfile(user_config_path)
|
||||||
|
elif os.name == 'posix': # For MacOS, Linux, and anything using Bash
|
||||||
|
subprocess.call(('open', '-t', user_config_path))
|
||||||
|
|
||||||
@click.command()
|
@click.command()
|
||||||
@click.option('--port', default=8000, help='Port to bind the server to.')
|
@click.option('--port', default=8000, help='Port to bind the server to.')
|
||||||
|
@ -16,22 +62,17 @@ load_dotenv()
|
||||||
@click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`')
|
@click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`')
|
||||||
@click.option('--config', is_flag=True, help='Create and open .env file from .env.template')
|
@click.option('--config', is_flag=True, help='Create and open .env file from .env.template')
|
||||||
@click.option('--test', default=None, help='proxy chat completions url to make a test request to')
|
@click.option('--test', default=None, help='proxy chat completions url to make a test request to')
|
||||||
def run_server(port, api_base, model, deploy, debug, temperature, max_tokens, telemetry, config, test):
|
@click.option('--local', is_flag=True, default=False, help='for local debugging')
|
||||||
|
def run_server(port, api_base, model, deploy, debug, temperature, max_tokens, telemetry, config, test, local):
|
||||||
if config:
|
if config:
|
||||||
if os.path.exists('.env.template'):
|
open_config()
|
||||||
if not os.path.exists('.env'):
|
|
||||||
with open('.env.template', 'r') as source:
|
|
||||||
data = source.read()
|
|
||||||
with open('.env', 'w') as destination:
|
|
||||||
destination.write(data)
|
|
||||||
|
|
||||||
click.echo('Opening .env file...')
|
if local:
|
||||||
subprocess.call(['open', '.env']) # replace `open` with `start` on Windows
|
from proxy_server import app, initialize, deploy_proxy
|
||||||
|
debug = True
|
||||||
else:
|
else:
|
||||||
click.echo('No .env.template file found.')
|
|
||||||
|
|
||||||
from .proxy_server import app, initialize, deploy_proxy
|
from .proxy_server import app, initialize, deploy_proxy
|
||||||
# from proxy_server import app, initialize, deploy_proxy
|
|
||||||
if deploy == True:
|
if deploy == True:
|
||||||
print(f"\033[32mLiteLLM: Deploying your proxy to api.litellm.ai\033[0m\n")
|
print(f"\033[32mLiteLLM: Deploying your proxy to api.litellm.ai\033[0m\n")
|
||||||
print(f"\033[32mLiteLLM: Deploying proxy for model: {model}\033[0m\n")
|
print(f"\033[32mLiteLLM: Deploying proxy for model: {model}\033[0m\n")
|
||||||
|
@ -57,6 +98,7 @@ def run_server(port, api_base, model, deploy, debug, temperature, max_tokens, te
|
||||||
click.echo(f'LiteLLM: response from proxy {response}')
|
click.echo(f'LiteLLM: response from proxy {response}')
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
|
load_config()
|
||||||
initialize(model, api_base, debug, temperature, max_tokens, telemetry)
|
initialize(model, api_base, debug, temperature, max_tokens, telemetry)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -120,8 +120,7 @@ def model_list():
|
||||||
async def completion(request: Request):
|
async def completion(request: Request):
|
||||||
data = await request.json()
|
data = await request.json()
|
||||||
print_verbose(f"data passed in: {data}")
|
print_verbose(f"data passed in: {data}")
|
||||||
if (user_model is None):
|
if user_model:
|
||||||
raise ValueError("Proxy model needs to be set")
|
|
||||||
data["model"] = user_model
|
data["model"] = user_model
|
||||||
if user_api_base:
|
if user_api_base:
|
||||||
data["api_base"] = user_api_base
|
data["api_base"] = user_api_base
|
||||||
|
@ -154,8 +153,7 @@ async def completion(request: Request):
|
||||||
async def chat_completion(request: Request):
|
async def chat_completion(request: Request):
|
||||||
data = await request.json()
|
data = await request.json()
|
||||||
print_verbose(f"data passed in: {data}")
|
print_verbose(f"data passed in: {data}")
|
||||||
if (user_model is None):
|
if user_model:
|
||||||
raise ValueError("Proxy model needs to be set")
|
|
||||||
data["model"] = user_model
|
data["model"] = user_model
|
||||||
# override with user settings
|
# override with user settings
|
||||||
if user_temperature:
|
if user_temperature:
|
||||||
|
@ -186,7 +184,6 @@ async def chat_completion(request: Request):
|
||||||
)
|
)
|
||||||
response = litellm.completion(**data)
|
response = litellm.completion(**data)
|
||||||
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
||||||
print("reaches stream")
|
|
||||||
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
||||||
print_verbose(f"response: {response}")
|
print_verbose(f"response: {response}")
|
||||||
return response
|
return response
|
|
@ -94,6 +94,14 @@ last_fetched_at_keys = None
|
||||||
def _generate_id(): # private helper function
|
def _generate_id(): # private helper function
|
||||||
return 'chatcmpl-' + str(uuid.uuid4())
|
return 'chatcmpl-' + str(uuid.uuid4())
|
||||||
|
|
||||||
|
def map_finish_reason(finish_reason: str): # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null'
|
||||||
|
# anthropic mapping
|
||||||
|
print(f"receives finish reason: {finish_reason}")
|
||||||
|
if finish_reason == "stop_sequence":
|
||||||
|
return "stop"
|
||||||
|
return finish_reason
|
||||||
|
|
||||||
|
|
||||||
class Message(OpenAIObject):
|
class Message(OpenAIObject):
|
||||||
def __init__(self, content="default", role="assistant", logprobs=None, **params):
|
def __init__(self, content="default", role="assistant", logprobs=None, **params):
|
||||||
super(Message, self).__init__(**params)
|
super(Message, self).__init__(**params)
|
||||||
|
@ -114,7 +122,7 @@ class Choices(OpenAIObject):
|
||||||
def __init__(self, finish_reason=None, index=0, message=None, **params):
|
def __init__(self, finish_reason=None, index=0, message=None, **params):
|
||||||
super(Choices, self).__init__(**params)
|
super(Choices, self).__init__(**params)
|
||||||
if finish_reason:
|
if finish_reason:
|
||||||
self.finish_reason = finish_reason
|
self.finish_reason = map_finish_reason(finish_reason)
|
||||||
else:
|
else:
|
||||||
self.finish_reason = "stop"
|
self.finish_reason = "stop"
|
||||||
self.index = index
|
self.index = index
|
||||||
|
@ -3200,6 +3208,7 @@ class CustomStreamWrapper:
|
||||||
model_response.choices[0].delta = Delta(**completion_obj)
|
model_response.choices[0].delta = Delta(**completion_obj)
|
||||||
return model_response
|
return model_response
|
||||||
elif model_response.choices[0].finish_reason:
|
elif model_response.choices[0].finish_reason:
|
||||||
|
model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai
|
||||||
return model_response
|
return model_response
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
|
|
13
poetry.lock
generated
13
poetry.lock
generated
|
@ -122,6 +122,17 @@ files = [
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
frozenlist = ">=1.1.0"
|
frozenlist = ">=1.1.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "appdirs"
|
||||||
|
version = "1.4.4"
|
||||||
|
description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"},
|
||||||
|
{file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-timeout"
|
name = "async-timeout"
|
||||||
version = "4.0.3"
|
version = "4.0.3"
|
||||||
|
@ -1074,4 +1085,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.8"
|
python-versions = "^3.8"
|
||||||
content-hash = "0fa234d1342838a6cc444dd996dbe404ca2cd6c872dcf560dbe420a2956aaecd"
|
content-hash = "c8cae152cee4eda56560529476234bc5e91171c6207641af797e7bebf720a499"
|
||||||
|
|
|
@ -14,6 +14,7 @@ tiktoken = ">=0.4.0"
|
||||||
importlib-metadata = ">=6.8.0"
|
importlib-metadata = ">=6.8.0"
|
||||||
tokenizers = "*"
|
tokenizers = "*"
|
||||||
click = "*"
|
click = "*"
|
||||||
|
appdirs = "^1.4.4"
|
||||||
|
|
||||||
[tool.poetry.scripts]
|
[tool.poetry.scripts]
|
||||||
litellm = 'litellm:run_server'
|
litellm = 'litellm:run_server'
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue