fix(proxy/): remove cloned repo

2023-10-12 21:46:18 -07:00 · 2023-10-12 21:46:18 -07:00 · f2eb1b4658
commit f2eb1b4658
parent 8bb9be3c5a
8 changed files with 0 additions and 775 deletions
--- a/litellm/proxy/litellm-proxy/README.md
+++ b/litellm/proxy/litellm-proxy/README.md
@ -1,47 +0,0 @@
-# litellm-proxy
-
-A local, fast, and lightweight **OpenAI-compatible server** to call 100+ LLM APIs.
-
-## usage 
-
-```shell 
-$ pip install litellm
-```
-```shell
-$ litellm --model ollama/codellama 
-
-#INFO: Ollama running on http://0.0.0.0:8000
-```
-
-## replace openai base
-```python 
-import openai 
-
-openai.api_base = "http://0.0.0.0:8000"
-
-print(openai.ChatCompletion.create(model="test", messages=[{"role":"user", "content":"Hey!"}]))
-``` 
-
-[**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/proxy_server)
-
-## configure proxy
-
-To save API Keys, change model prompt, etc. you'll need to create a local instance of it:
-```shell
-$ litellm --create-proxy
-```
-This will create a local project called `litellm-proxy` in your current directory, that has: 
-* **proxy_cli.py**: Runs the proxy
-* **proxy_server.py**: Contains the API calling logic
-    - `/chat/completions`: receives `openai.ChatCompletion.create` call.
-    - `/completions`: receives `openai.Completion.create` call.
-    - `/models`: receives `openai.Model.list()` call
-* **secrets.toml**: Stores your api keys, model configs, etc.
-
-Run it by doing:
-```shell
-$ cd litellm-proxy
-```
-```shell
-$ python proxy_cli.py --model ollama/llama # replace with your model name
-```
--- a/litellm/proxy/litellm-proxy/init.py
+++ b/litellm/proxy/litellm-proxy/init.py
@ -1 +0,0 @@
-from . import *
--- a/litellm/proxy/litellm-proxy/api_log.json
+++ b/litellm/proxy/litellm-proxy/api_log.json
@ -1,85 +0,0 @@
-{
-  "20231012182157625128": {
-    "pre_api_call": {
-      "model": "anthropic.claude-v2",
-      "messages": [
-        {
-          "role": "user",
-          "content": "what do you know?"
-        }
-      ],
-      "optional_params": {
-        "temperature": 0.1,
-        "stream": true
-      },
-      "litellm_params": {
-        "return_async": false,
-        "api_key": null,
-        "force_timeout": 600,
-        "logger_fn": null,
-        "verbose": false,
-        "custom_llm_provider": "bedrock",
-        "api_base": null,
-        "litellm_call_id": "902640b5-4a26-4629-932d-35d6cf4e1635",
-        "model_alias_map": {},
-        "completion_call_id": null,
-        "metadata": null,
-        "stream_response": {}
-      },
-      "input": "\n\nHuman: \n\nHuman: what do you know?\n\nAssistant: ",
-      "api_key": "",
-      "additional_args": {
-        "complete_input_dict": "{\"prompt\": \"\\n\\nHuman: \\n\\nHuman: what do you know?\\n\\nAssistant: \", \"temperature\": 0.1, \"max_tokens_to_sample\": 256}"
-      },
-      "log_event_type": "pre_api_call"
-    },
-    "post_api_call": {
-      "model": "anthropic.claude-v2",
-      "messages": [
-        {
-          "role": "user",
-          "content": "what do you know?"
-        }
-      ],
-      "optional_params": {
-        "temperature": 0.1,
-        "stream": true
-      },
-      "litellm_params": {
-        "return_async": false,
-        "api_key": null,
-        "force_timeout": 600,
-        "logger_fn": null,
-        "verbose": false,
-        "custom_llm_provider": "bedrock",
-        "api_base": null,
-        "litellm_call_id": "902640b5-4a26-4629-932d-35d6cf4e1635",
-        "model_alias_map": {},
-        "completion_call_id": null,
-        "metadata": null,
-        "stream_response": {}
-      },
-      "input": null,
-      "api_key": null,
-      "additional_args": {},
-      "log_event_type": "post_api_call",
-      "original_response": "<class 'generator'>",
-      "complete_streaming_response": {
-        "id": "chatcmpl-1757e5ea-71f2-44a2-9d8d-1ba8238a7c99",
-        "object": "chat.completion.chunk",
-        "created": 1697160117,
-        "model": "anthropic.claude-v2",
-        "choices": [
-          {
-            "index": 0,
-            "message": {
-              "role": "assistant",
-              "content": " I'm Claude, an AI assistant created by Anthropic. I don't actually have general knowledge about the world. I'm an AI conversational model trained by Anthropic to be helpful, harmless, and honest."
-            },
-            "finish_reason": "stop_sequence"
-          }
-        ]
-      }
-    }
-  }
-}
--- a/litellm/proxy/litellm-proxy/cost.log
+++ b/litellm/proxy/litellm-proxy/cost.log
--- a/litellm/proxy/litellm-proxy/costs.json
+++ b/litellm/proxy/litellm-proxy/costs.json
@ -1,8 +0,0 @@
-{
-  "Oct-12-2023": {
-    "claude-2": {
-      "cost": 0.02365918,
-      "num_requests": 1
-    }
-  }
-}
--- a/litellm/proxy/litellm-proxy/proxy_cli.py
+++ b/litellm/proxy/litellm-proxy/proxy_cli.py
@ -1,142 +0,0 @@
-import click
-import subprocess, traceback
-import os, sys
-import random
-from dotenv import load_dotenv
-
-load_dotenv()
-from importlib import resources
-import shutil
-telemetry = None
-
-def run_ollama_serve():
-    command = ['ollama', 'serve']
-    
-    with open(os.devnull, 'w') as devnull:
-        process = subprocess.Popen(command, stdout=devnull, stderr=devnull)
-
-def clone_subfolder(repo_url, subfolder, destination):
-  # Clone the full repo
-  repo_name = repo_url.split('/')[-1]  
-  repo_master = os.path.join(destination, "repo_master")
-  subprocess.run(['git', 'clone', repo_url, repo_master])
-
-  # Move into the subfolder 
-  subfolder_path = os.path.join(repo_master, subfolder)
-
-  # Copy subfolder to destination
-  for file_name in os.listdir(subfolder_path):
-    source = os.path.join(subfolder_path, file_name)
-    if os.path.isfile(source):
-        shutil.copy(source, destination)
-    else:
-        dest_path = os.path.join(destination, file_name)
-        shutil.copytree(source, dest_path)
-
-  # Remove cloned repo folder
-  subprocess.run(['rm', '-rf', os.path.join(destination, "repo_master")])
-  feature_telemetry(feature="create-proxy")
-
-def is_port_in_use(port):
-    import socket
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        return s.connect_ex(('localhost', port)) == 0
-
-@click.command()
-@click.option('--host', default='0.0.0.0', help='Host for the server to listen on.')
-@click.option('--port', default=8000, help='Port to bind the server to.')
-@click.option('--api_base', default=None, help='API base URL.')
-@click.option('--model', default=None, help='The model name to pass to litellm expects') 
-@click.option('--deploy', is_flag=True, type=bool, help='Get a deployed proxy endpoint - api.litellm.ai')
-@click.option('--debug', default=False, is_flag=True, type=bool, help='To debug the input') 
-@click.option('--temperature', default=None, type=float, help='Set temperature for the model') 
-@click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model') 
-@click.option('--drop_params', is_flag=True, help='Drop any unmapped params') 
-@click.option('--create_proxy', is_flag=True, help='Creates a local OpenAI-compatible server template') 
-@click.option('--add_function_to_prompt', is_flag=True, help='If function passed but unsupported, pass it as prompt') 
-@click.option('--max_budget', default=None, type=float, help='Set max budget for API calls - works for hosted models like OpenAI, TogetherAI, Anthropic, etc.`') 
-@click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`') 
-@click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to')
-@click.option('--local', is_flag=True, default=False, help='for local debugging')
-@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
-def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, max_budget, telemetry, test, local, cost):
-    global feature_telemetry
-    if local:
-        from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
-        debug = True
-    else:
-        try:
-            from .proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
-        except ImportError as e: 
-            from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
-    feature_telemetry = usage_telemetry
-    if create_proxy == True: 
-        repo_url = 'https://github.com/BerriAI/litellm'
-        subfolder = 'litellm/proxy' 
-        destination = os.path.join(os.getcwd(), 'litellm-proxy')
-
-        clone_subfolder(repo_url, subfolder, destination)
-
-        return
-    if deploy == True:
-        print(f"\033[32mLiteLLM: Deploying your proxy to api.litellm.ai\033[0m\n")
-        print(f"\033[32mLiteLLM: Deploying proxy for model: {model}\033[0m\n")
-        url = deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy)
-        print(f"\033[32mLiteLLM: Deploy Successfull\033[0m\n")
-        print(f"\033[32mLiteLLM: Your deployed url: {url}\033[0m\n")
-
-        print(f"\033[32mLiteLLM: Test your URL using the following: \"litellm --test {url}\"\033[0m")
-        return
-    if model and "ollama" in model: 
-        run_ollama_serve()
-    if cost == True:
-        print_cost_logs()
-        return
-    if test != False:
-        click.echo('LiteLLM: Making a test ChatCompletions request to your proxy')
-        import openai
-        if test == True: # flag value set
-            api_base = f"http://{host}:{port}"
-        else: 
-            api_base = test
-        openai.api_base = api_base
-        openai.api_key = "temp-key"
-        print(openai.api_base)
-
-        response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages = [
-            {
-                "role": "user",
-                "content": "this is a test request, acknowledge that you got it"
-            }
-        ])
-        click.echo(f'LiteLLM: response from proxy {response}')
-
-        click.echo(f'LiteLLM: response from proxy with streaming {response}')
-        response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages = [
-            {
-                "role": "user",
-                "content": "this is a test request, acknowledge that you got it"
-            }
-        ],
-        stream=True,
-        )
-        for chunk in response:
-            click.echo(f'LiteLLM: streaming response from proxy {chunk}')
-        return
-    else:
-        initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt)
-        try:
-            import uvicorn
-        except:
-            raise ImportError("Uvicorn needs to be imported. Run - `pip install uvicorn`")
-        print(f"\033[32mLiteLLM: Deployed Proxy Locally\033[0m\n")
-        print(f"\033[32mLiteLLM: Test your local endpoint with: \"litellm --test\" [In a new terminal tab]\033[0m\n")
-        print(f"\033[32mLiteLLM: Deploy your proxy using the following: \"litellm --model claude-instant-1 --deploy\" Get an https://api.litellm.ai/chat/completions endpoint \033[0m\n")
-        
-        if port == 8000 and is_port_in_use(port):
-            port = random.randint(1024, 49152)
-        uvicorn.run(app, host=host, port=port)
-
-
-if __name__ == "__main__":
-    run_server()
--- a/litellm/proxy/litellm-proxy/proxy_server.py
+++ b/litellm/proxy/litellm-proxy/proxy_server.py
@ -1,461 +0,0 @@
-import sys, os, platform, time, copy
-import threading
-import shutil, random, traceback
-# sys.path.insert(
-#     0, os.path.abspath("../..")
-# )  # Adds the parent directory to the system path - for litellm local dev
-
-
-try:
-    import uvicorn
-    import fastapi
-    import tomli as tomllib
-    import appdirs
-except ImportError:
-    import subprocess
-    import sys
-
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "uvicorn", "fastapi", "tomli", "appdirs"])
-    import uvicorn
-    import fastapi
-    import tomli as tomllib
-    import appdirs
-
-import random
-list_of_messages = [
-    "'The thing I wish you improved is...'",
-    "'A feature I really want is...'",
-    "'The worst thing about this product is...'",
-    "'This product would be better if...'",
-    "'I don't like how this works...'",
-    "'It would help me if you could add...'",
-    "'This feature doesn't meet my needs because...'",
-    "'I get frustrated when the product...'",  
-]
-
-def generate_feedback_box():
-  box_width = 60
-
-  # Select a random message
-  message = random.choice(list_of_messages)
-
-  print()
-  print('\033[1;37m' + '#' + '-'*box_width + '#\033[0m')
-  print('\033[1;37m' + '#' + ' '*box_width + '#\033[0m')
-  print('\033[1;37m' + '# {:^59} #\033[0m'.format(message))
-  print('\033[1;37m' + '# {:^59} #\033[0m'.format('https://github.com/BerriAI/litellm/issues/new'))
-  print('\033[1;37m' + '#' + ' '*box_width + '#\033[0m') 
-  print('\033[1;37m' + '#' + '-'*box_width + '#\033[0m')
-  print()
-  print(' Thank you for using LiteLLM! - Krrish & Ishaan')
-  print()
-  print()
-
-generate_feedback_box()
-
-
-print()
-print("\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m")
-print()
-print("\033[1;34mDocs: https://docs.litellm.ai/docs/proxy_server\033[0m")
-print() 
-
-import litellm
-from fastapi import FastAPI, Request
-from fastapi.routing import APIRouter
-from fastapi.responses import StreamingResponse, FileResponse
-import json
-import logging
-
-app = FastAPI()
-router = APIRouter()
-
-user_api_base = None
-user_model = None
-user_debug = False
-user_max_tokens = None
-user_temperature = None
-user_telemetry = True
-user_config = None
-config_filename = "secrets.toml"
-config_dir = os.getcwd()
-user_config_path = os.path.join(config_dir, config_filename)
-log_file = 'api_log.json'
-#### HELPER FUNCTIONS ####
-def print_verbose(print_statement):
-    global user_debug 
-    if user_debug: 
-         print(print_statement)
-
-def usage_telemetry(feature: str): # helps us know if people are using this feature. Set `litellm --telemetry False` to your cli call to turn this off
-    print(f"user_telemtry: {user_telemetry}")
-    if user_telemetry: 
-        print(f"feature telemetry: {feature}")
-        data = {
-            "feature": feature # "local_proxy_server"
-        }
-        threading.Thread(target=litellm.utils.litellm_telemetry, args=(data,), daemon=True).start()
-
-def load_config():
-    try: 
-        global user_config, user_api_base, user_max_tokens, user_temperature, user_model
-        # As the .env file is typically much simpler in structure, we use load_dotenv here directly
-        with open(user_config_path, "rb") as f:
-            user_config = tomllib.load(f)
-
-        ## load keys
-        if "keys" in user_config:
-            for key in user_config["keys"]:
-                if key == "HUGGINGFACE_API_KEY":
-                    litellm.huggingface_key = user_config["keys"][key]
-                elif key == "OPENAI_API_KEY":
-                    litellm.openai_key = user_config["keys"][key]
-                elif key == "TOGETHERAI_API_KEY": 
-                    litellm.togetherai_api_key = user_config["keys"][key]
-                elif key == "NLP_CLOUD_API_KEY": 
-                    litellm.nlp_cloud_key = user_config["keys"][key]
-                elif key == "ANTHROPIC_API_KEY":
-                    litellm.anthropic_key = user_config["keys"][key]
-                elif key == "REPLICATE_API_KEY":
-                    litellm.replicate_key = user_config["keys"][key]
-                elif key == "AWS_ACCESS_KEY_ID":
-                    os.environ["AWS_ACCESS_KEY_ID"] = user_config["keys"][key]
-                elif key == "AWS_SECRET_ACCESS_KEY":
-                    os.environ["AWS_SECRET_ACCESS_KEY"] = user_config["keys"][key]
-
-        ## settings 
-        litellm.add_function_to_prompt = user_config["general"].get("add_function_to_prompt", True) # by default add function to prompt if unsupported by provider
-        litellm.drop_params = user_config["general"].get("drop_params", True) # by default drop params if unsupported by provider
-
-        ## load model config - to set this run `litellm --config`
-        model_config = None
-        if user_model in user_config["model"]: 
-            model_config = user_config["model"][user_model]
-        
-        print_verbose(f"user_config: {user_config}")
-        print_verbose(f"model_config: {model_config}")
-        if model_config is None:
-            return
-        user_model = model_config["model_name"] # raise an error if this isn't set when user runs either `litellm --model local_model` or  `litellm --model hosted_model`
-        print_verbose(f"user_model: {user_model}")
-
-
-        user_max_tokens = model_config.get("max_tokens", None)
-        user_temperature = model_config.get("temperature", None)
-        user_api_base = model_config.get("api_base", None)
-        
-        ## custom prompt template
-        if "prompt_template" in model_config:
-            model_prompt_template = model_config["prompt_template"]
-            if len(model_prompt_template.keys()) > 0: # if user has initialized this at all
-                litellm.register_prompt_template(
-                    model=user_model,
-                    initial_prompt_value=model_prompt_template.get("MODEL_PRE_PROMPT", ""),
-                    roles={
-                        "system": {
-                            "pre_message": model_prompt_template.get("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""),
-                            "post_message": model_prompt_template.get("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""), 
-                        }, 
-                        "user": {
-                            "pre_message": model_prompt_template.get("MODEL_USER_MESSAGE_START_TOKEN", ""),
-                            "post_message": model_prompt_template.get("MODEL_USER_MESSAGE_END_TOKEN", ""), 
-                        }, 
-                        "assistant": {
-                            "pre_message": model_prompt_template.get("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""),
-                            "post_message": model_prompt_template.get("MODEL_ASSISTANT_MESSAGE_END_TOKEN", ""), 
-                        }
-                    }, 
-                    final_prompt_value=model_prompt_template.get("MODEL_POST_PROMPT", ""),
-                )
-    except Exception as e:
-        traceback.print_exc()
-
-def initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt):
-    global user_model, user_api_base, user_debug, user_max_tokens, user_temperature, user_telemetry
-    user_model = model
-    user_debug = debug
-    
-    load_config()
-    user_api_base = api_base
-    user_max_tokens = max_tokens
-    user_temperature = temperature
-    user_telemetry = telemetry
-    usage_telemetry(feature="local_proxy_server")
-    if drop_params == True: 
-        litellm.drop_params = True
-    if add_function_to_prompt == True: 
-        litellm.add_function_to_prompt = True
-    if max_budget: 
-        litellm.max_budget = max_budget
-
-
-def deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy):
-    import requests
-    # Load .env file
-
-    # Prepare data for posting
-    data = {
-        "model": model,
-        "api_base": api_base,
-        "temperature": temperature,
-        "max_tokens": max_tokens,
-    }
-
-    # print(data)
-
-    # Make post request to the url
-    url = "https://litellm-api.onrender.com/deploy"
-    # url = "http://0.0.0.0:4000/deploy"
-
-    with open(".env", "w") as env_file:
-        for row in data:
-            env_file.write(f"{row.upper()}='{data[row]}'\n")
-        env_file.write("\n\n")
-        for key in os.environ:
-            value = os.environ[key]
-            env_file.write(f"{key}='{value}'\n")
-        # env_file.write(str(os.environ))
-
-    files = {"file": open(".env", "rb")}
-    # print(files)
-
-
-
-    response = requests.post(url, data=data, files=files)
-    # print(response)
-    # Check the status of the request
-    if response.status_code != 200:
-        return f"Request to url: {url} failed with status: {response.status_code}"
-
-    # Reading the response
-    response_data = response.json()
-    # print(response_data)
-    url = response_data["url"]
-    # # Do something with response_data
-
-    return url
-
-
-# for streaming
-def data_generator(response):
-    print_verbose("inside generator")
-    for chunk in response:
-        print_verbose(f"returned chunk: {chunk}")
-        yield f"data: {json.dumps(chunk)}\n\n"
-
-def track_cost_callback(
-    kwargs,                 # kwargs to completion
-    completion_response,    # response from completion
-    start_time, end_time    # start/end time
-):
-    # track cost like this 
-    # {
-    #     "Oct12": {
-    #         "gpt-4": 10,
-    #         "claude-2": 12.01, 
-    #     },
-    #     "Oct 15": {
-    #         "ollama/llama2": 0.0,
-    #         "gpt2": 1.2
-    #     }
-    # }
-    try:
-
-        # for streaming responses
-        if "complete_streaming_response" in kwargs:
-            # for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost 
-            completion_response=kwargs["complete_streaming_response"]
-            input_text = kwargs["messages"]
-            output_text = completion_response["choices"][0]["message"]["content"]
-            response_cost = litellm.completion_cost(
-                model = kwargs["model"],
-                messages = input_text,
-                completion=output_text
-            )
-            model = kwargs['model']
-            print("streaming response_cost", response_cost)
-
-        # for non streaming responses
-        else:
-            # we pass the completion_response obj
-            if kwargs["stream"] != True:
-                response_cost = litellm.completion_cost(completion_response=completion_response)
-                print("regular response_cost", response_cost)
-                model = completion_response["model"]
-
-        # read/write from json for storing daily model costs 
-        cost_data = {}
-        try:
-            with open("costs.json") as f:
-                cost_data = json.load(f)
-        except FileNotFoundError:
-            cost_data = {} 
-        import datetime
-        date = datetime.datetime.now().strftime("%b-%d-%Y")
-        if date not in cost_data:
-            cost_data[date] = {}
-
-        if kwargs["model"] in cost_data[date]:
-            cost_data[date][kwargs["model"]]["cost"] += response_cost
-            cost_data[date][kwargs["model"]]["num_requests"] += 1
-        else:
-            cost_data[date][kwargs["model"]] = {
-                "cost": response_cost,
-                "num_requests": 1
-            }
-
-        with open("costs.json", "w") as f:
-            json.dump(cost_data, f, indent=2)
-
-    except:
-        pass
-
-def logger(
-    kwargs,                 # kwargs to completion
-    completion_response=None,    # response from completion
-    start_time=None, 
-    end_time=None    # start/end time
-):
-  log_event_type = kwargs['log_event_type']
-  print(f"REACHES LOGGER: {log_event_type}")
-  try: 
-    if log_event_type == 'pre_api_call':
-        inference_params = copy.deepcopy(kwargs)
-        timestamp = inference_params.pop('start_time')
-        dt_key = timestamp.strftime("%Y%m%d%H%M%S%f")[:23]
-        log_data = {
-            dt_key: {
-                'pre_api_call': inference_params
-            }
-        }
-        
-        try:
-            with open(log_file, 'r') as f:
-                existing_data = json.load(f)
-        except FileNotFoundError:
-            existing_data = {}
-            
-        existing_data.update(log_data)
-        
-        with open(log_file, 'w') as f:
-            json.dump(existing_data, f, indent=2)
-    elif log_event_type == 'post_api_call':
-        print(f"post api call kwargs: {kwargs}")
-        if "stream" not in kwargs["optional_params"] or kwargs["optional_params"]["stream"] is False or kwargs.get("complete_streaming_response", False):
-            inference_params = copy.deepcopy(kwargs)
-            timestamp = inference_params.pop('start_time')
-            dt_key = timestamp.strftime("%Y%m%d%H%M%S%f")[:23]
-            
-            with open(log_file, 'r') as f:
-                existing_data = json.load(f)
-            
-            existing_data[dt_key]['post_api_call'] = inference_params
-            
-            with open(log_file, 'w') as f:
-                json.dump(existing_data, f, indent=2)
-  except: 
-      traceback.print_exc()
-
-litellm.input_callback = [logger]
-litellm.success_callback = [logger]
-litellm.failure_callback = [logger]
-
-def litellm_completion(data, type): 
-    try: 
-        if user_model:
-            data["model"] = user_model
-        # override with user settings
-        if user_temperature: 
-            data["temperature"] = user_temperature
-        if user_max_tokens: 
-            data["max_tokens"] = user_max_tokens
-        if user_api_base: 
-            data["api_base"] = user_api_base
-        if type == "completion": 
-            response = litellm.text_completion(**data)
-        elif type == "chat_completion": 
-            response = litellm.completion(**data)
-        if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
-            return StreamingResponse(data_generator(response), media_type='text/event-stream')
-        print_verbose(f"response: {response}")
-        return response
-    except Exception as e: 
-        if "Invalid response object from API" in str(e): 
-            completion_call_details = {}
-            if user_model: 
-                completion_call_details["model"] = user_model
-            else: 
-                completion_call_details["model"] = data['model']
-            
-            if user_api_base: 
-                completion_call_details["api_base"] = user_api_base
-            else: 
-                completion_call_details["api_base"] = None
-            print(f"\033[1;31mLiteLLM.Exception: Invalid API Call. Call details: Model: \033[1;37m{completion_call_details['model']}\033[1;31m; LLM Provider: \033[1;37m{e.llm_provider}\033[1;31m; Custom API Base - \033[1;37m{completion_call_details['api_base']}\033[1;31m\033[0m")
-            if completion_call_details["api_base"] == "http://localhost:11434": 
-                print()
-                print("Trying to call ollama? Try `litellm --model ollama/llama2 --api_base http://localhost:11434`")
-                print()
-        else: 
-            print(f"\033[1;31mLiteLLM.Exception: {str(e)}\033[0m")
-        return {"message": "An error occurred"}, 500
-
-#### API ENDPOINTS ####
-@router.get("/models") # if project requires model list 
-def model_list():
-    if user_model != None:
-        return dict(
-            data=[{"id": user_model, "object": "model", "created": 1677610602, "owned_by": "openai"}],
-            object="list",
-        )
-    else:
-        all_models = litellm.model_list
-        return dict(
-            data = [{"id": model, "object": "model", "created": 1677610602, "owned_by": "openai"} for model in all_models],
-            object="list",
-        )
-
-@router.post("/completions")
-async def completion(request: Request):
-    data = await request.json()
-    return litellm_completion(data=data, type="completion")
-
-@router.post("/chat/completions")
-async def chat_completion(request: Request):
-    data = await request.json()
-    print(f"data passed in: {data}")
-    response = litellm_completion(data, type="chat_completion")
-    return response
-
-
-# V1 Endpoints - some apps expect a v1 endpoint - these call the regular function
-@router.post("/v1/completions")
-async def v1_completion(request: Request):
-    data = await request.json()
-    return litellm_completion(data=data, type="completion")
-
-@router.post("/v1/chat/completions")
-async def v1_chat_completion(request: Request):
-    data = await request.json()
-    print_verbose(f"data passed in: {data}")
-    response = litellm_completion(data, type="chat_completion")
-    return response
-
-def print_cost_logs():
-    with open('costs.json', 'r') as f:
-        # print this in green
-        print("\033[1;32m")
-        print(f.read())
-        print("\033[0m")
-    return
-
-@router.get("/ollama_logs")
-async def retrieve_server_log(request: Request):
-    filepath = os.path.expanduser('~/.ollama/logs/server.log')
-    return FileResponse(filepath)
-
-@router.get("/")
-async def home(request: Request):
-    return "LiteLLM: RUNNING"
-
-app.include_router(router)
--- a/litellm/proxy/litellm-proxy/secrets_template.toml
+++ b/litellm/proxy/litellm-proxy/secrets_template.toml
@ -1,31 +0,0 @@
-[keys]
-# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key
-# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key
-# TOGETHERAI_API_KEY="" # Uncomment to save your TogetherAI API key
-# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key
-# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key
-# REPLICATE_API_KEY="" # Uncomment to save your Replicate API key
-# AWS_ACCESS_KEY_ID = "" # Uncomment to save your Bedrock/Sagemaker access keys
-# AWS_SECRET_ACCESS_KEY = "" # Uncomment to save your Bedrock/Sagemaker access keys
-
-[general]
-# add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead
-# drop_params = True # drop any params not supported by the provider (e.g. Ollama)
-
-[model."ollama/llama2"] # run via `litellm --model ollama/llama2`
-# max_tokens = "" # set max tokens for the model 
-# temperature = "" # set temperature for the model 
-# api_base = "" # set a custom api base for the model
-
-[model."ollama/llama2".prompt_template] # [OPTIONAL] LiteLLM can automatically formats the prompt - docs: https://docs.litellm.ai/docs/completion/prompt_formatting
-# MODEL_SYSTEM_MESSAGE_START_TOKEN = "[INST] <<SYS>>\n" # This does not need to be a token, can be any string
-# MODEL_SYSTEM_MESSAGE_END_TOKEN = "\n<</SYS>>\n [/INST]\n" # This does not need to be a token, can be any string
-
-# MODEL_USER_MESSAGE_START_TOKEN = "[INST] " # This does not need to be a token, can be any string
-# MODEL_USER_MESSAGE_END_TOKEN = " [/INST]\n" # Applies only to user messages. Can be any string.
-
-# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "" # Applies only to assistant messages. Can be any string.
-# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "\n" # Applies only to system messages. Can be any string.
-
-# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt
-# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt