update values

2023-09-29 20:53:55 -07:00 · 2023-09-29 20:53:55 -07:00 · dc9f02267a
commit dc9f02267a
parent b8c8f01221
8 changed files with 84 additions and 10 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,4 +3,4 @@
 litellm_uuid.txt
 __pycache__/
 bun.lockb
-.DS_Store
+.DS_Store
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/proxy/.env.template
+++ b/litellm/proxy/.env.template
@ -0,0 +1,19 @@
 ### KEYS ###
 # HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key
 # OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key
 # TOGETHER_API_KEY="" # Uncomment to save your TogetherAI API key
 # NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key
 # ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key
 ### MODEL CUSTOM PROMPT TEMPLATE ###
 # MODEL_SYSTEM_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
 # MODEL_SYSTEM_MESSAGE_END_TOKEN = "<|endoftext|>" # This does not need to be a token, can be any string
 # MODEL_USER_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
 # MODEL_USER_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to user messages. Can be any string.
 # MODEL_ASSISTANT_MESSAGE_START_TOKEN = "<|prompter|>" # Applies only to assistant messages. Can be any string.
 # MODEL_ASSISTANT_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to system messages. Can be any string.
 # MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt
 # MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt
--- a/litellm/proxy/.gitignore
+++ b/litellm/proxy/.gitignore
@ -0,0 +1 @@
 .env
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@ -1,18 +1,36 @@
 import click
 import subprocess
 import os
 from dotenv import load_dotenv
 load_dotenv()
@click.command()
@click.option('--port', default=8000, help='Port to bind the server to.')
@click.option('--api_base', default=None, help='API base URL.')
-@click.option('--model', required=True, help='The model name to pass to litellm expects') 
+@click.option('--model', default=None, help='The model name to pass to litellm expects') 
-@click.option('--deploy', required=True, help='Get a deployed proxy endpoint - api.litellm.ai') 
+@click.option('--deploy', is_flag=True, help='Get a deployed proxy endpoint - api.litellm.ai') 
@click.option('--debug', is_flag=True, help='To debug the input') 
@click.option('--temperature', default=None, type=float, help='Set temperature for the model') 
@click.option('--max_tokens', default=None, help='Set max tokens for the model') 
@click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`') 
-def run_server(port, api_base, model, debug, temperature, max_tokens, telemetry):
+@click.option('--config', is_flag=True, help='Create and open .env file from .env.template')
-    from .proxy_server import app, initialize
+def run_server(port, api_base, model, deploy, debug, temperature, max_tokens, telemetry, config):
    if config:
        if os.path.exists('.env.template'):
            if not os.path.exists('.env'):
                with open('.env.template', 'r') as source:
                    data = source.read()
                    with open('.env', 'w') as destination:
                        destination.write(data)
            click.echo('Opening .env file...')
            subprocess.call(['open', '.env'])  # replace `open` with `start` on Windows
        else:
            click.echo('No .env.template file found.')
    # from .proxy_server import app, initialize
    from proxy_server import app, initialize
    initialize(model, api_base, debug, temperature, max_tokens, telemetry)
    try:
        import uvicorn
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -33,7 +33,6 @@ user_telemetry = False
 #### HELPER FUNCTIONS ####
 def print_verbose(print_statement):
    global user_debug 
    print(f"user_debug: {user_debug}")
    if user_debug: 
         print(print_statement)
@ -79,6 +78,26 @@ async def completion(request: Request):
    data["model"] = user_model
    if user_api_base:
        data["api_base"] = user_api_base
    ## check for custom prompt template ## 
    litellm.register_prompt_template(
        model=user_model, 
        roles={
            "system": {
                "pre_message": os.getenv.get("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""),
                "post_message": os.getenv.get("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""),    
            }, 
            "assistant": {
                "pre_message": os.getenv.get("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""), 
                "post_message": os.getenv.get("MODEL_ASSISTANT_MESSAGE_END_TOKEN", "")
            }, 
            "user": {
                "pre_message": os.getenv.get("MODEL_USER_MESSAGE_START_TOKEN", ""), 
                "post_message": os.getenv.get("MODEL_USER_MESSAGE_END_TOKEN", "")
            }
        },
        initial_prompt_value=os.getenv.get("MODEL_PRE_PROMPT", ""), 
        final_prompt_value=os.getenv.get("MODEL_POST_PROMPT", "")
    )
    response = litellm.text_completion(**data)
    if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
        return StreamingResponse(data_generator(response), media_type='text/event-stream')
@ -91,7 +110,6 @@ async def chat_completion(request: Request):
    if (user_model is None):
        raise ValueError("Proxy model needs to be set")
    data["model"] = user_model
    # override with user settings
    if user_temperature: 
        data["temperature"] = user_temperature
@ -99,8 +117,26 @@ async def chat_completion(request: Request):
        data["max_tokens"] = user_max_tokens
    if user_api_base: 
        data["api_base"] = user_api_base
-
+    ## check for custom prompt template ## 
-
+    litellm.register_prompt_template(
        model=user_model, 
        roles={
            "system": {
                "pre_message": os.getenv("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""),
                "post_message": os.getenv("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""),    
            }, 
            "assistant": {
                "pre_message": os.getenv("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""), 
                "post_message": os.getenv("MODEL_ASSISTANT_MESSAGE_END_TOKEN", "")
            }, 
            "user": {
                "pre_message": os.getenv("MODEL_USER_MESSAGE_START_TOKEN", ""), 
                "post_message": os.getenv("MODEL_USER_MESSAGE_END_TOKEN", "")
            }
        },
        initial_prompt_value=os.getenv("MODEL_PRE_PROMPT", ""), 
        final_prompt_value=os.getenv("MODEL_POST_PROMPT", "")
    )
    response = litellm.completion(**data)
    if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
        print("reaches stream")
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.798"
+version = "0.1.799"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"