feat(proxy_server): adds create-proxy feature

2023-10-12 18:24:09 -07:00 · 2023-10-12 18:24:09 -07:00 · b28c055896
commit b28c055896
parent 3da89a58ae
11 changed files with 246 additions and 124 deletions
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@ -13,6 +13,24 @@ class CustomLogger:
    def __init__(self, callback_func):
        # Instance variables
        self.callback_func = callback_func
+    
+    def log_input_event(self, model, messages, kwargs, print_verbose):
+        try: 
+            print_verbose(
+                    f"Custom Logger - Enters logging function for model {kwargs}"
+                )
+            kwargs["model"] = model
+            kwargs["messages"] = messages
+            kwargs["log_event_type"] = "pre_api_call"
+            self.callback_func(
+                kwargs,
+            )
+            print_verbose(
+                f"Custom Logger - model call details: {kwargs}"
+            )
+        except: 
+            traceback.print_exc()
+            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")

    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
        # Method definition
@ -20,6 +38,7 @@ class CustomLogger:
            print_verbose(
                f"Custom Logger - Enters logging function for model {kwargs}"
            )
+            kwargs["log_event_type"] = "post_api_call"
            self.callback_func(
                kwargs, # kwargs to func
                response_obj,
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@ -209,7 +209,7 @@ def init_bedrock_client(
 def convert_messages_to_prompt(messages, provider):
    # handle anthropic prompts using anthropic constants
    if provider == "anthropic":
-        prompt = ""
+        prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
        for message in messages:
            if "role" in message:
                if message["role"] == "user":
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -7,7 +7,6 @@ def default_pt(messages):
    return " ".join(message["content"] for message in messages)

 # Llama2 prompt template
-llama_2_special_tokens = ["<s>", "</s>"]
 def llama_2_chat_pt(messages):
    prompt = custom_prompt(
        role_dict={
--- a/litellm/proxy/api_log.json
+++ b/litellm/proxy/api_log.json
@ -0,0 +1,85 @@
+{
+  "20231012182157625128": {
+    "pre_api_call": {
+      "model": "anthropic.claude-v2",
+      "messages": [
+        {
+          "role": "user",
+          "content": "what do you know?"
+        }
+      ],
+      "optional_params": {
+        "temperature": 0.1,
+        "stream": true
+      },
+      "litellm_params": {
+        "return_async": false,
+        "api_key": null,
+        "force_timeout": 600,
+        "logger_fn": null,
+        "verbose": false,
+        "custom_llm_provider": "bedrock",
+        "api_base": null,
+        "litellm_call_id": "902640b5-4a26-4629-932d-35d6cf4e1635",
+        "model_alias_map": {},
+        "completion_call_id": null,
+        "metadata": null,
+        "stream_response": {}
+      },
+      "input": "\n\nHuman: \n\nHuman: what do you know?\n\nAssistant: ",
+      "api_key": "",
+      "additional_args": {
+        "complete_input_dict": "{\"prompt\": \"\\n\\nHuman: \\n\\nHuman: what do you know?\\n\\nAssistant: \", \"temperature\": 0.1, \"max_tokens_to_sample\": 256}"
+      },
+      "log_event_type": "pre_api_call"
+    },
+    "post_api_call": {
+      "model": "anthropic.claude-v2",
+      "messages": [
+        {
+          "role": "user",
+          "content": "what do you know?"
+        }
+      ],
+      "optional_params": {
+        "temperature": 0.1,
+        "stream": true
+      },
+      "litellm_params": {
+        "return_async": false,
+        "api_key": null,
+        "force_timeout": 600,
+        "logger_fn": null,
+        "verbose": false,
+        "custom_llm_provider": "bedrock",
+        "api_base": null,
+        "litellm_call_id": "902640b5-4a26-4629-932d-35d6cf4e1635",
+        "model_alias_map": {},
+        "completion_call_id": null,
+        "metadata": null,
+        "stream_response": {}
+      },
+      "input": null,
+      "api_key": null,
+      "additional_args": {},
+      "log_event_type": "post_api_call",
+      "original_response": "<class 'generator'>",
+      "complete_streaming_response": {
+        "id": "chatcmpl-1757e5ea-71f2-44a2-9d8d-1ba8238a7c99",
+        "object": "chat.completion.chunk",
+        "created": 1697160117,
+        "model": "anthropic.claude-v2",
+        "choices": [
+          {
+            "index": 0,
+            "message": {
+              "role": "assistant",
+              "content": " I'm Claude, an AI assistant created by Anthropic. I don't actually have general knowledge about the world. I'm an AI conversational model trained by Anthropic to be helpful, harmless, and honest."
+            },
+            "finish_reason": "stop_sequence"
+          }
+        ]
+      }
+    }
+  }
+}
--- a/litellm/proxy/cost.log
+++ b/litellm/proxy/cost.log
@ -1,11 +0,0 @@
-2023-10-11 15:02:23 - Model gpt-4 Cost: $0.00063000
-2023-10-11 15:02:57 - Model gpt-4 Cost: $0.00093000
-2023-10-11 15:09:10 - Model gpt-4 Cost: $0.00135000
-2023-10-11 15:09:50 - Model gpt-4 Cost: $0.01626000
-2023-10-11 15:12:57 - Model gpt-4 Cost: $0.01974000
-2023-10-11 15:13:35 - Model gpt-4 Cost: $0.02415000
-2023-10-11 15:14:04 - Model gpt-4 Cost: $0.03291000
-2023-10-11 15:18:16 - Model gpt-4 Cost: $0.03669000
-2023-10-11 15:19:12 - Model gpt-4 Cost: $0.04806000
-2023-10-11 21:11:06 - Model claude-2 Cost: $0.00041534
-2023-10-11 21:15:34 - Model claude-2 Cost: $0.00054606
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@ -1,6 +1,6 @@
 import click
 import subprocess, traceback
-import os, appdirs
+import os, sys
 import random
 from dotenv import load_dotenv

@ -8,42 +8,33 @@ load_dotenv()
 from importlib import resources
 import shutil

-config_filename = "litellm.secrets.toml"
-pkg_config_filename = "template.secrets.toml"
-# Using appdirs to determine user-specific config path
-config_dir = appdirs.user_config_dir("litellm")
-user_config_path = os.path.join(config_dir, config_filename)
-
 def run_ollama_serve():
    command = ['ollama', 'serve']
    
    with open(os.devnull, 'w') as devnull:
        process = subprocess.Popen(command, stdout=devnull, stderr=devnull)

-def open_config():
-    # Create the .env file if it doesn't exist
-    if not os.path.exists(user_config_path):
-        # If user's env doesn't exist, copy the default env from the package
-        here = os.path.abspath(os.path.dirname(__file__))
-        parent_dir = os.path.dirname(here)
-        default_env_path = os.path.join(parent_dir, pkg_config_filename)
-        # Ensure the user-specific directory exists
-        os.makedirs(config_dir, exist_ok=True)
-        # Copying the file using shutil.copy
-        try:
-            shutil.copy(default_env_path, user_config_path)
-        except Exception as e:
-            print(f"Failed to copy .template.secrets.toml: {e}")
+def clone_subfolder(repo_url, subfolder, destination):

-    # Open the .env file in the default editor 
-    try: 
-        if os.name == 'nt': # For Windows
-            os.startfile(user_config_path)
-        elif os.name == 'posix': # For MacOS, Linux, and anything using Bash
-            subprocess.call(('open', '-t', user_config_path)) 
-    except: 
-        pass
-    print(f"LiteLLM: Proxy Server Config - {user_config_path}")
+  # Clone the full repo
+  repo_name = repo_url.split('/')[-1]  
+  repo_master = os.path.join(destination, "repo_master")
+  subprocess.run(['git', 'clone', repo_url, repo_master])
+
+  # Move into the subfolder 
+  subfolder_path = os.path.join(repo_master, subfolder)
+
+  # Copy subfolder to destination
+  for file_name in os.listdir(subfolder_path):
+    source = os.path.join(subfolder_path, file_name)
+    if os.path.isfile(source):
+        shutil.copy(source, destination)
+    else:
+        dest_path = os.path.join(destination, file_name)
+        shutil.copytree(source, dest_path)
+
+  # Remove cloned repo folder
+  subprocess.run(['rm', '-rf', os.path.join(destination, "repo_master")])

 def is_port_in_use(port):
    import socket
@ -60,23 +51,31 @@ def is_port_in_use(port):
@click.option('--temperature', default=None, type=float, help='Set temperature for the model') 
@click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model') 
@click.option('--drop_params', is_flag=True, help='Drop any unmapped params') 
+@click.option('--create_proxy', is_flag=True, help='Creates a local OpenAI-compatible server template') 
@click.option('--add_function_to_prompt', is_flag=True, help='If function passed but unsupported, pass it as prompt') 
@click.option('--max_budget', default=None, type=float, help='Set max budget for API calls - works for hosted models like OpenAI, TogetherAI, Anthropic, etc.`') 
@click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`') 
-@click.option('--config', is_flag=True, help='Create and open .env file from .env.template')
@click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to')
@click.option('--local', is_flag=True, default=False, help='for local debugging')
@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
-def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, add_function_to_prompt, max_budget, telemetry, config, test, local, cost):
-    if config:
-        open_config()
-        return
+def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, max_budget, telemetry, test, local, cost):
    if local:
        from proxy_server import app, initialize, deploy_proxy, print_cost_logs
        debug = True
    else:
-        from .proxy_server import app, initialize, deploy_proxy, print_cost_logs
+        try:
+            from .proxy_server import app, initialize, deploy_proxy, print_cost_logs
+        except ImportError as e: 
+            from proxy_server import app, initialize, deploy_proxy, print_cost_logs

+    if create_proxy == True: 
+        repo_url = 'https://github.com/BerriAI/litellm'
+        subfolder = 'litellm/proxy' 
+        destination = os.path.join(os.getcwd(), 'litellm-proxy')
+
+        clone_subfolder(repo_url, subfolder, destination)
+
+        return
    if deploy == True:
        print(f"\033[32mLiteLLM: Deploying your proxy to api.litellm.ai\033[0m\n")
        print(f"\033[32mLiteLLM: Deploying proxy for model: {model}\033[0m\n")
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1,9 +1,10 @@
-import sys, os, platform 
+import sys, os, platform, time, copy
 import threading
 import shutil, random, traceback
-sys.path.insert(
-    0, os.path.abspath("../..")
-)  # Adds the parent directory to the system path
+# sys.path.insert(
+#     0, os.path.abspath("../..")
+# )  # Adds the parent directory to the system path - for litellm local dev
+

 try:
    import uvicorn
@ -76,12 +77,10 @@ user_max_tokens = None
 user_temperature = None
 user_telemetry = False
 user_config = None
-config_filename = "litellm.secrets.toml"
-pkg_config_filename = "template.secrets.toml"
-# Using appdirs to determine user-specific config path
-config_dir = appdirs.user_config_dir("litellm")
+config_filename = "secrets.toml"
+config_dir = os.getcwd()
 user_config_path = os.path.join(config_dir, config_filename)
-
+log_file = 'api_log.json'
 #### HELPER FUNCTIONS ####
 def print_verbose(print_statement):
    global user_debug 
@ -98,15 +97,6 @@ def usage_telemetry(): # helps us know if people are using this feature. Set `li
 def load_config():
    try: 
        global user_config, user_api_base, user_max_tokens, user_temperature, user_model
-        if not os.path.exists(user_config_path):
-            # If user's config doesn't exist, copy the default config from the package
-            here = os.path.abspath(os.path.dirname(__file__))
-            parent_dir = os.path.dirname(here)
-            default_config_path = os.path.join(parent_dir, pkg_config_filename)
-            # Ensure the user-specific directory exists
-            os.makedirs(config_dir, exist_ok=True)
-            # Copying the file using shutil.copy
-            shutil.copy(default_config_path, user_config_path)
        # As the .env file is typically much simpler in structure, we use load_dotenv here directly
        with open(user_config_path, "rb") as f:
            user_config = tomllib.load(f)
@ -133,11 +123,8 @@ def load_config():

        ## load model config - to set this run `litellm --config`
        model_config = None
-        if user_model == "local": 
-            model_config = user_config["local_model"]
-        elif user_model == "hosted":
-            model_config = user_config["hosted_model"]
-            litellm.max_budget = model_config.get("max_budget", None) # check if user set a budget for hosted model - e.g. gpt-4
+        if user_model in user_config["model"]: 
+            model_config = user_config["model"][user_model]
        
        print_verbose(f"user_config: {user_config}")
        print_verbose(f"model_config: {model_config}")
@ -317,7 +304,55 @@ def track_cost_callback(
    except:
        pass

-litellm.success_callback = [track_cost_callback]
+def logger(
+    kwargs,                 # kwargs to completion
+    completion_response=None,    # response from completion
+    start_time=None, 
+    end_time=None    # start/end time
+):
+  log_event_type = kwargs['log_event_type']
+  print(f"REACHES LOGGER: {log_event_type}")
+  try: 
+    if log_event_type == 'pre_api_call':
+        inference_params = copy.deepcopy(kwargs)
+        timestamp = inference_params.pop('start_time')
+        dt_key = timestamp.strftime("%Y%m%d%H%M%S%f")[:23]
+        log_data = {
+            dt_key: {
+                'pre_api_call': inference_params
+            }
+        }
+        
+        try:
+            with open(log_file, 'r') as f:
+                existing_data = json.load(f)
+        except FileNotFoundError:
+            existing_data = {}
+            
+        existing_data.update(log_data)
+        
+        with open(log_file, 'w') as f:
+            json.dump(existing_data, f, indent=2)
+    elif log_event_type == 'post_api_call':
+        print(f"post api call kwargs: {kwargs}")
+        if "stream" not in kwargs["optional_params"] or kwargs["optional_params"]["stream"] is False or kwargs.get("complete_streaming_response", False):
+            inference_params = copy.deepcopy(kwargs)
+            timestamp = inference_params.pop('start_time')
+            dt_key = timestamp.strftime("%Y%m%d%H%M%S%f")[:23]
+            
+            with open(log_file, 'r') as f:
+                existing_data = json.load(f)
+            
+            existing_data[dt_key]['post_api_call'] = inference_params
+            
+            with open(log_file, 'w') as f:
+                json.dump(existing_data, f, indent=2)
+  except: 
+      traceback.print_exc()
+
+litellm.input_callback = [logger]
+litellm.success_callback = [logger]
+litellm.failure_callback = [logger]

 def litellm_completion(data, type): 
    try: 
--- a/litellm/proxy/secrets.toml
+++ b/litellm/proxy/secrets.toml
@ -0,0 +1,29 @@
+[keys]
+# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key
+# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key
+# TOGETHERAI_API_KEY="" # Uncomment to save your TogetherAI API key
+# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key
+# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key
+# REPLICATE_API_KEY="" # Uncomment to save your Replicate API key
+
+[general]
+# add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead
+# drop_params = True # drop any params not supported by the provider (e.g. Ollama)
+
+[model."ollama/llama2"] # run via `litellm --model ollama/llama2`
+# max_tokens = "" # set max tokens for the model 
+# temperature = "" # set temperature for the model 
+# api_base = "" # set a custom api base for the model
+
+[model."ollama/llama2".prompt_template] # [OPTIONAL] LiteLLM can automatically formats the prompt - docs: https://docs.litellm.ai/docs/completion/prompt_formatting
+# MODEL_SYSTEM_MESSAGE_START_TOKEN = "[INST] <<SYS>>\n" # This does not need to be a token, can be any string
+# MODEL_SYSTEM_MESSAGE_END_TOKEN = "\n<</SYS>>\n [/INST]\n" # This does not need to be a token, can be any string
+
+# MODEL_USER_MESSAGE_START_TOKEN = "[INST] " # This does not need to be a token, can be any string
+# MODEL_USER_MESSAGE_END_TOKEN = " [/INST]\n" # Applies only to user messages. Can be any string.
+
+# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "" # Applies only to assistant messages. Can be any string.
+# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "\n" # Applies only to system messages. Can be any string.
+
+# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt
+# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt
--- a/litellm/template.secrets.toml
+++ b/litellm/template.secrets.toml
@ -1,50 +0,0 @@
-[keys]
-# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key
-# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key
-# TOGETHERAI_API_KEY="" # Uncomment to save your TogetherAI API key
-# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key
-# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key
-# REPLICATE_API_KEY="" # Uncomment to save your Replicate API key
-
-[general]
-# add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead
-# drop_params = True # drop any params not supported by the provider (e.g. Ollama)
-
-[local_model] # run via `litellm --model local`
-# model_name = "ollama/codellama"  # Uncomment to set a local model 
-# max_tokens = "" # set max tokens for the model 
-# temperature = "" # set temperature for the model 
-# api_base = "" # set a custom api base for the model
-
-[local_model.prompt_template] # Set a custom prompt template for your local model - docs: https://docs.litellm.ai/docs/completion/prompt_formatting#format-prompt-yourself
-# MODEL_SYSTEM_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
-# MODEL_SYSTEM_MESSAGE_END_TOKEN = "<|endoftext|>" # This does not need to be a token, can be any string
-
-# MODEL_USER_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
-# MODEL_USER_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to user messages. Can be any string.
-
-# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "<|prompter|>" # Applies only to assistant messages. Can be any string.
-# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to system messages. Can be any string.
-
-# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt
-# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt
-
-[hosted_model] # run via `litellm --model hosted`
-# model_name = "gpt-4"
-# max_tokens = "" # set max tokens for the model 
-# temperature = "" # set temperature for the model 
-# api_base = "" # set a custom api base for the model
-# max_budget = 100 # sets a max budget of $100 for your hosted model
-
-[hosted_model.prompt_template] # Set a custom prompt template for your hosted model - docs: https://docs.litellm.ai/docs/completion/prompt_formatting#format-prompt-yourself
-# MODEL_SYSTEM_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
-# MODEL_SYSTEM_MESSAGE_END_TOKEN = "<|endoftext|>" # This does not need to be a token, can be any string
-
-# MODEL_USER_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
-# MODEL_USER_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to user messages. Can be any string.
-
-# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "<|prompter|>" # Applies only to assistant messages. Can be any string.
-# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to system messages. Can be any string.
-
-# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt
-# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -53,7 +53,6 @@ from .exceptions import (
 )
 from typing import cast, List, Dict, Union, Optional
 from .caching import Cache
-from .llms.prompt_templates.factory import llama_2_special_tokens

 ####### ENVIRONMENT VARIABLES ####################
 dotenv.load_dotenv()  # Loading env variables using dotenv
@ -249,6 +248,7 @@ class Logging:
            "messages": self.messages,
            "optional_params": self.optional_params,
            "litellm_params": self.litellm_params,
+            "start_time": self.start_time
        }

    def pre_call(self, input, api_key, model=None, additional_args={}):
@ -323,7 +323,15 @@ class Logging:
                            message=f"Model Call Details pre-call: {self.model_call_details}",
                            level="info",
                        )
+                    elif callable(callback): # custom logger functions
+                        customLogger.log_input_event(
+                            model=self.model,
+                            messages=self.messages,
+                            kwargs=self.model_call_details,
+                            print_verbose=print_verbose,
+                        )
                except Exception as e:
+                    traceback.print_exc()
                    print_verbose(
                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
                    )
@ -416,6 +424,7 @@ class Logging:
            
            ## BUILD COMPLETE STREAMED RESPONSE
            if self.stream: 
+                print(f"stream result: {result}")
                if result.choices[0].finish_reason: # if it's the last chunk 
                    self.streaming_chunks.append(result)
                    complete_streaming_response = litellm.stream_chunk_builder(self.streaming_chunks)
@ -573,6 +582,14 @@ class Logging:
                            capture_exception(exception)
                        else:
                            print_verbose(f"capture exception not initialized: {capture_exception}")
+                    elif callable(callback): # custom logger functions
+                        customLogger.log_event(
+                            kwargs=self.model_call_details,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                        )
                except Exception as e:
                    print_verbose(
                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {traceback.format_exc()}"