diff --git a/litellm/proxy/.env.template b/litellm/.env.template similarity index 100% rename from litellm/proxy/.env.template rename to litellm/.env.template diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index d3a54a2d4..a7ff83ae4 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 381e8b8fd..d24548488 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -217,5 +217,4 @@ def prompt_factory(model: str, messages: list): else: return hf_chat_template(original_model_name, messages) except: - traceback.print_exc() return default_pt(messages=messages) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2) \ No newline at end of file diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index dc6778e6d..28f252fe4 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -1,9 +1,55 @@ import click import subprocess -import os +import os, appdirs from dotenv import load_dotenv load_dotenv() +from importlib import resources +import shutil + +config_filename = ".env.litellm" + +# Using appdirs to determine user-specific config path +config_dir = appdirs.user_config_dir("litellm") +user_config_path = os.path.join(config_dir, config_filename) + +def load_config(): + try: + if not os.path.exists(user_config_path): + # If user's config doesn't exist, copy the default config from the package + here = os.path.abspath(os.path.dirname(__file__)) + parent_dir = os.path.dirname(here) + default_config_path = os.path.join(parent_dir, '.env.template') + # Ensure the user-specific directory exists + os.makedirs(config_dir, exist_ok=True) + # Copying the file using shutil.copy + shutil.copy(default_config_path, user_config_path) + + # As the .env file is typically much simpler in structure, we use load_dotenv here directly + load_dotenv(dotenv_path=user_config_path) + except: + pass + +def open_config(): + # Create the .env file if it doesn't exist + if not os.path.exists(user_config_path): + # If user's env doesn't exist, copy the default env from the package + here = os.path.abspath(os.path.dirname(__file__)) + parent_dir = os.path.dirname(here) + default_env_path = os.path.join(parent_dir, '.env.template') + # Ensure the user-specific directory exists + os.makedirs(config_dir, exist_ok=True) + # Copying the file using shutil.copy + try: + shutil.copy(default_env_path, user_config_path) + except Exception as e: + print(f"Failed to copy .env.template: {e}") + + # Open the .env file in the default editor + if os.name == 'nt': # For Windows + os.startfile(user_config_path) + elif os.name == 'posix': # For MacOS, Linux, and anything using Bash + subprocess.call(('open', '-t', user_config_path)) @click.command() @click.option('--port', default=8000, help='Port to bind the server to.') @@ -16,22 +62,17 @@ load_dotenv() @click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`') @click.option('--config', is_flag=True, help='Create and open .env file from .env.template') @click.option('--test', default=None, help='proxy chat completions url to make a test request to') -def run_server(port, api_base, model, deploy, debug, temperature, max_tokens, telemetry, config, test): +@click.option('--local', is_flag=True, default=False, help='for local debugging') +def run_server(port, api_base, model, deploy, debug, temperature, max_tokens, telemetry, config, test, local): if config: - if os.path.exists('.env.template'): - if not os.path.exists('.env'): - with open('.env.template', 'r') as source: - data = source.read() - with open('.env', 'w') as destination: - destination.write(data) - - click.echo('Opening .env file...') - subprocess.call(['open', '.env']) # replace `open` with `start` on Windows - else: - click.echo('No .env.template file found.') + open_config() - from .proxy_server import app, initialize, deploy_proxy - # from proxy_server import app, initialize, deploy_proxy + if local: + from proxy_server import app, initialize, deploy_proxy + debug = True + else: + from .proxy_server import app, initialize, deploy_proxy + if deploy == True: print(f"\033[32mLiteLLM: Deploying your proxy to api.litellm.ai\033[0m\n") print(f"\033[32mLiteLLM: Deploying proxy for model: {model}\033[0m\n") @@ -57,6 +98,7 @@ def run_server(port, api_base, model, deploy, debug, temperature, max_tokens, te click.echo(f'LiteLLM: response from proxy {response}') return else: + load_config() initialize(model, api_base, debug, temperature, max_tokens, telemetry) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 313ca43bd..5f771d489 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -120,9 +120,8 @@ def model_list(): async def completion(request: Request): data = await request.json() print_verbose(f"data passed in: {data}") - if (user_model is None): - raise ValueError("Proxy model needs to be set") - data["model"] = user_model + if user_model: + data["model"] = user_model if user_api_base: data["api_base"] = user_api_base ## check for custom prompt template ## @@ -154,9 +153,8 @@ async def completion(request: Request): async def chat_completion(request: Request): data = await request.json() print_verbose(f"data passed in: {data}") - if (user_model is None): - raise ValueError("Proxy model needs to be set") - data["model"] = user_model + if user_model: + data["model"] = user_model # override with user settings if user_temperature: data["temperature"] = user_temperature @@ -186,7 +184,6 @@ async def chat_completion(request: Request): ) response = litellm.completion(**data) if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses - print("reaches stream") return StreamingResponse(data_generator(response), media_type='text/event-stream') print_verbose(f"response: {response}") return response \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 49bece66a..30d1870de 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -94,6 +94,14 @@ last_fetched_at_keys = None def _generate_id(): # private helper function return 'chatcmpl-' + str(uuid.uuid4()) +def map_finish_reason(finish_reason: str): # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null' + # anthropic mapping + print(f"receives finish reason: {finish_reason}") + if finish_reason == "stop_sequence": + return "stop" + return finish_reason + + class Message(OpenAIObject): def __init__(self, content="default", role="assistant", logprobs=None, **params): super(Message, self).__init__(**params) @@ -114,7 +122,7 @@ class Choices(OpenAIObject): def __init__(self, finish_reason=None, index=0, message=None, **params): super(Choices, self).__init__(**params) if finish_reason: - self.finish_reason = finish_reason + self.finish_reason = map_finish_reason(finish_reason) else: self.finish_reason = "stop" self.index = index @@ -3200,6 +3208,7 @@ class CustomStreamWrapper: model_response.choices[0].delta = Delta(**completion_obj) return model_response elif model_response.choices[0].finish_reason: + model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai return model_response except StopIteration: raise StopIteration diff --git a/poetry.lock b/poetry.lock index dc313bd6f..4943d7f52 100644 --- a/poetry.lock +++ b/poetry.lock @@ -122,6 +122,17 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "appdirs" +version = "1.4.4" +description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = "*" +files = [ + {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, + {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, +] + [[package]] name = "async-timeout" version = "4.0.3" @@ -1074,4 +1085,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "0fa234d1342838a6cc444dd996dbe404ca2cd6c872dcf560dbe420a2956aaecd" +content-hash = "c8cae152cee4eda56560529476234bc5e91171c6207641af797e7bebf720a499" diff --git a/pyproject.toml b/pyproject.toml index e857833f6..e3f3c556d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ tiktoken = ">=0.4.0" importlib-metadata = ">=6.8.0" tokenizers = "*" click = "*" +appdirs = "^1.4.4" [tool.poetry.scripts] litellm = 'litellm:run_server'