diff --git a/.gitignore b/.gitignore index 2a26b3530..c687cdff2 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ litellm_uuid.txt __pycache__/ bun.lockb -.DS_Store +.DS_Store \ No newline at end of file diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index e58c70e65..61a2e4b71 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index aaa54ab89..da70c6bd2 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/proxy/.env.template b/litellm/proxy/.env.template new file mode 100644 index 000000000..8873aa289 --- /dev/null +++ b/litellm/proxy/.env.template @@ -0,0 +1,19 @@ +### KEYS ### +# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key +# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key +# TOGETHER_API_KEY="" # Uncomment to save your TogetherAI API key +# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key +# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key + +### MODEL CUSTOM PROMPT TEMPLATE ### +# MODEL_SYSTEM_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string +# MODEL_SYSTEM_MESSAGE_END_TOKEN = "<|endoftext|>" # This does not need to be a token, can be any string + +# MODEL_USER_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string +# MODEL_USER_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to user messages. Can be any string. + +# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "<|prompter|>" # Applies only to assistant messages. Can be any string. +# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to system messages. Can be any string. + +# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt +# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt \ No newline at end of file diff --git a/litellm/proxy/.gitignore b/litellm/proxy/.gitignore new file mode 100644 index 000000000..2eea525d8 --- /dev/null +++ b/litellm/proxy/.gitignore @@ -0,0 +1 @@ +.env \ No newline at end of file diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 7698fa63c..16f39404f 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -1,18 +1,36 @@ import click +import subprocess +import os from dotenv import load_dotenv + load_dotenv() @click.command() @click.option('--port', default=8000, help='Port to bind the server to.') @click.option('--api_base', default=None, help='API base URL.') -@click.option('--model', required=True, help='The model name to pass to litellm expects') -@click.option('--deploy', required=True, help='Get a deployed proxy endpoint - api.litellm.ai') +@click.option('--model', default=None, help='The model name to pass to litellm expects') +@click.option('--deploy', is_flag=True, help='Get a deployed proxy endpoint - api.litellm.ai') @click.option('--debug', is_flag=True, help='To debug the input') @click.option('--temperature', default=None, type=float, help='Set temperature for the model') @click.option('--max_tokens', default=None, help='Set max tokens for the model') @click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`') -def run_server(port, api_base, model, debug, temperature, max_tokens, telemetry): - from .proxy_server import app, initialize +@click.option('--config', is_flag=True, help='Create and open .env file from .env.template') +def run_server(port, api_base, model, deploy, debug, temperature, max_tokens, telemetry, config): + if config: + if os.path.exists('.env.template'): + if not os.path.exists('.env'): + with open('.env.template', 'r') as source: + data = source.read() + with open('.env', 'w') as destination: + destination.write(data) + + click.echo('Opening .env file...') + subprocess.call(['open', '.env']) # replace `open` with `start` on Windows + else: + click.echo('No .env.template file found.') + + # from .proxy_server import app, initialize + from proxy_server import app, initialize initialize(model, api_base, debug, temperature, max_tokens, telemetry) try: import uvicorn diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index f6446fbf7..b8d246cea 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -33,7 +33,6 @@ user_telemetry = False #### HELPER FUNCTIONS #### def print_verbose(print_statement): global user_debug - print(f"user_debug: {user_debug}") if user_debug: print(print_statement) @@ -79,6 +78,26 @@ async def completion(request: Request): data["model"] = user_model if user_api_base: data["api_base"] = user_api_base + ## check for custom prompt template ## + litellm.register_prompt_template( + model=user_model, + roles={ + "system": { + "pre_message": os.getenv.get("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""), + "post_message": os.getenv.get("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""), + }, + "assistant": { + "pre_message": os.getenv.get("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""), + "post_message": os.getenv.get("MODEL_ASSISTANT_MESSAGE_END_TOKEN", "") + }, + "user": { + "pre_message": os.getenv.get("MODEL_USER_MESSAGE_START_TOKEN", ""), + "post_message": os.getenv.get("MODEL_USER_MESSAGE_END_TOKEN", "") + } + }, + initial_prompt_value=os.getenv.get("MODEL_PRE_PROMPT", ""), + final_prompt_value=os.getenv.get("MODEL_POST_PROMPT", "") + ) response = litellm.text_completion(**data) if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses return StreamingResponse(data_generator(response), media_type='text/event-stream') @@ -91,7 +110,6 @@ async def chat_completion(request: Request): if (user_model is None): raise ValueError("Proxy model needs to be set") data["model"] = user_model - # override with user settings if user_temperature: data["temperature"] = user_temperature @@ -99,8 +117,26 @@ async def chat_completion(request: Request): data["max_tokens"] = user_max_tokens if user_api_base: data["api_base"] = user_api_base - - + ## check for custom prompt template ## + litellm.register_prompt_template( + model=user_model, + roles={ + "system": { + "pre_message": os.getenv("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""), + "post_message": os.getenv("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""), + }, + "assistant": { + "pre_message": os.getenv("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""), + "post_message": os.getenv("MODEL_ASSISTANT_MESSAGE_END_TOKEN", "") + }, + "user": { + "pre_message": os.getenv("MODEL_USER_MESSAGE_START_TOKEN", ""), + "post_message": os.getenv("MODEL_USER_MESSAGE_END_TOKEN", "") + } + }, + initial_prompt_value=os.getenv("MODEL_PRE_PROMPT", ""), + final_prompt_value=os.getenv("MODEL_POST_PROMPT", "") + ) response = litellm.completion(**data) if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses print("reaches stream") diff --git a/pyproject.toml b/pyproject.toml index 7186e84d7..62daf0d5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.798" +version = "0.1.799" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"