From f2eb1b4658d78e2bfd42726c14a3fd071008d309 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 12 Oct 2023 21:46:18 -0700 Subject: [PATCH] fix(proxy/): remove cloned repo --- litellm/proxy/litellm-proxy/README.md | 47 -- litellm/proxy/litellm-proxy/__init__.py | 1 - litellm/proxy/litellm-proxy/api_log.json | 85 ---- litellm/proxy/litellm-proxy/cost.log | 0 litellm/proxy/litellm-proxy/costs.json | 8 - litellm/proxy/litellm-proxy/proxy_cli.py | 142 ------ litellm/proxy/litellm-proxy/proxy_server.py | 461 ------------------ .../proxy/litellm-proxy/secrets_template.toml | 31 -- 8 files changed, 775 deletions(-) delete mode 100644 litellm/proxy/litellm-proxy/README.md delete mode 100644 litellm/proxy/litellm-proxy/__init__.py delete mode 100644 litellm/proxy/litellm-proxy/api_log.json delete mode 100644 litellm/proxy/litellm-proxy/cost.log delete mode 100644 litellm/proxy/litellm-proxy/costs.json delete mode 100644 litellm/proxy/litellm-proxy/proxy_cli.py delete mode 100644 litellm/proxy/litellm-proxy/proxy_server.py delete mode 100644 litellm/proxy/litellm-proxy/secrets_template.toml diff --git a/litellm/proxy/litellm-proxy/README.md b/litellm/proxy/litellm-proxy/README.md deleted file mode 100644 index 413c55b2c8..0000000000 --- a/litellm/proxy/litellm-proxy/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# litellm-proxy - -A local, fast, and lightweight **OpenAI-compatible server** to call 100+ LLM APIs. - -## usage - -```shell -$ pip install litellm -``` -```shell -$ litellm --model ollama/codellama - -#INFO: Ollama running on http://0.0.0.0:8000 -``` - -## replace openai base -```python -import openai - -openai.api_base = "http://0.0.0.0:8000" - -print(openai.ChatCompletion.create(model="test", messages=[{"role":"user", "content":"Hey!"}])) -``` - -[**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/proxy_server) - -## configure proxy - -To save API Keys, change model prompt, etc. you'll need to create a local instance of it: -```shell -$ litellm --create-proxy -``` -This will create a local project called `litellm-proxy` in your current directory, that has: -* **proxy_cli.py**: Runs the proxy -* **proxy_server.py**: Contains the API calling logic - - `/chat/completions`: receives `openai.ChatCompletion.create` call. - - `/completions`: receives `openai.Completion.create` call. - - `/models`: receives `openai.Model.list()` call -* **secrets.toml**: Stores your api keys, model configs, etc. - -Run it by doing: -```shell -$ cd litellm-proxy -``` -```shell -$ python proxy_cli.py --model ollama/llama # replace with your model name -``` \ No newline at end of file diff --git a/litellm/proxy/litellm-proxy/__init__.py b/litellm/proxy/litellm-proxy/__init__.py deleted file mode 100644 index b9742821a6..0000000000 --- a/litellm/proxy/litellm-proxy/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import * \ No newline at end of file diff --git a/litellm/proxy/litellm-proxy/api_log.json b/litellm/proxy/litellm-proxy/api_log.json deleted file mode 100644 index 810c5b826b..0000000000 --- a/litellm/proxy/litellm-proxy/api_log.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "20231012182157625128": { - "pre_api_call": { - "model": "anthropic.claude-v2", - "messages": [ - { - "role": "user", - "content": "what do you know?" - } - ], - "optional_params": { - "temperature": 0.1, - "stream": true - }, - "litellm_params": { - "return_async": false, - "api_key": null, - "force_timeout": 600, - "logger_fn": null, - "verbose": false, - "custom_llm_provider": "bedrock", - "api_base": null, - "litellm_call_id": "902640b5-4a26-4629-932d-35d6cf4e1635", - "model_alias_map": {}, - "completion_call_id": null, - "metadata": null, - "stream_response": {} - }, - "input": "\n\nHuman: \n\nHuman: what do you know?\n\nAssistant: ", - "api_key": "", - "additional_args": { - "complete_input_dict": "{\"prompt\": \"\\n\\nHuman: \\n\\nHuman: what do you know?\\n\\nAssistant: \", \"temperature\": 0.1, \"max_tokens_to_sample\": 256}" - }, - "log_event_type": "pre_api_call" - }, - "post_api_call": { - "model": "anthropic.claude-v2", - "messages": [ - { - "role": "user", - "content": "what do you know?" - } - ], - "optional_params": { - "temperature": 0.1, - "stream": true - }, - "litellm_params": { - "return_async": false, - "api_key": null, - "force_timeout": 600, - "logger_fn": null, - "verbose": false, - "custom_llm_provider": "bedrock", - "api_base": null, - "litellm_call_id": "902640b5-4a26-4629-932d-35d6cf4e1635", - "model_alias_map": {}, - "completion_call_id": null, - "metadata": null, - "stream_response": {} - }, - "input": null, - "api_key": null, - "additional_args": {}, - "log_event_type": "post_api_call", - "original_response": "", - "complete_streaming_response": { - "id": "chatcmpl-1757e5ea-71f2-44a2-9d8d-1ba8238a7c99", - "object": "chat.completion.chunk", - "created": 1697160117, - "model": "anthropic.claude-v2", - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": " I'm Claude, an AI assistant created by Anthropic. I don't actually have general knowledge about the world. I'm an AI conversational model trained by Anthropic to be helpful, harmless, and honest." - }, - "finish_reason": "stop_sequence" - } - ] - } - } - } -} \ No newline at end of file diff --git a/litellm/proxy/litellm-proxy/cost.log b/litellm/proxy/litellm-proxy/cost.log deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/litellm/proxy/litellm-proxy/costs.json b/litellm/proxy/litellm-proxy/costs.json deleted file mode 100644 index 8211cec220..0000000000 --- a/litellm/proxy/litellm-proxy/costs.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "Oct-12-2023": { - "claude-2": { - "cost": 0.02365918, - "num_requests": 1 - } - } -} \ No newline at end of file diff --git a/litellm/proxy/litellm-proxy/proxy_cli.py b/litellm/proxy/litellm-proxy/proxy_cli.py deleted file mode 100644 index b32d836304..0000000000 --- a/litellm/proxy/litellm-proxy/proxy_cli.py +++ /dev/null @@ -1,142 +0,0 @@ -import click -import subprocess, traceback -import os, sys -import random -from dotenv import load_dotenv - -load_dotenv() -from importlib import resources -import shutil -telemetry = None - -def run_ollama_serve(): - command = ['ollama', 'serve'] - - with open(os.devnull, 'w') as devnull: - process = subprocess.Popen(command, stdout=devnull, stderr=devnull) - -def clone_subfolder(repo_url, subfolder, destination): - # Clone the full repo - repo_name = repo_url.split('/')[-1] - repo_master = os.path.join(destination, "repo_master") - subprocess.run(['git', 'clone', repo_url, repo_master]) - - # Move into the subfolder - subfolder_path = os.path.join(repo_master, subfolder) - - # Copy subfolder to destination - for file_name in os.listdir(subfolder_path): - source = os.path.join(subfolder_path, file_name) - if os.path.isfile(source): - shutil.copy(source, destination) - else: - dest_path = os.path.join(destination, file_name) - shutil.copytree(source, dest_path) - - # Remove cloned repo folder - subprocess.run(['rm', '-rf', os.path.join(destination, "repo_master")]) - feature_telemetry(feature="create-proxy") - -def is_port_in_use(port): - import socket - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - return s.connect_ex(('localhost', port)) == 0 - -@click.command() -@click.option('--host', default='0.0.0.0', help='Host for the server to listen on.') -@click.option('--port', default=8000, help='Port to bind the server to.') -@click.option('--api_base', default=None, help='API base URL.') -@click.option('--model', default=None, help='The model name to pass to litellm expects') -@click.option('--deploy', is_flag=True, type=bool, help='Get a deployed proxy endpoint - api.litellm.ai') -@click.option('--debug', default=False, is_flag=True, type=bool, help='To debug the input') -@click.option('--temperature', default=None, type=float, help='Set temperature for the model') -@click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model') -@click.option('--drop_params', is_flag=True, help='Drop any unmapped params') -@click.option('--create_proxy', is_flag=True, help='Creates a local OpenAI-compatible server template') -@click.option('--add_function_to_prompt', is_flag=True, help='If function passed but unsupported, pass it as prompt') -@click.option('--max_budget', default=None, type=float, help='Set max budget for API calls - works for hosted models like OpenAI, TogetherAI, Anthropic, etc.`') -@click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`') -@click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to') -@click.option('--local', is_flag=True, default=False, help='for local debugging') -@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs') -def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, max_budget, telemetry, test, local, cost): - global feature_telemetry - if local: - from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry - debug = True - else: - try: - from .proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry - except ImportError as e: - from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry - feature_telemetry = usage_telemetry - if create_proxy == True: - repo_url = 'https://github.com/BerriAI/litellm' - subfolder = 'litellm/proxy' - destination = os.path.join(os.getcwd(), 'litellm-proxy') - - clone_subfolder(repo_url, subfolder, destination) - - return - if deploy == True: - print(f"\033[32mLiteLLM: Deploying your proxy to api.litellm.ai\033[0m\n") - print(f"\033[32mLiteLLM: Deploying proxy for model: {model}\033[0m\n") - url = deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy) - print(f"\033[32mLiteLLM: Deploy Successfull\033[0m\n") - print(f"\033[32mLiteLLM: Your deployed url: {url}\033[0m\n") - - print(f"\033[32mLiteLLM: Test your URL using the following: \"litellm --test {url}\"\033[0m") - return - if model and "ollama" in model: - run_ollama_serve() - if cost == True: - print_cost_logs() - return - if test != False: - click.echo('LiteLLM: Making a test ChatCompletions request to your proxy') - import openai - if test == True: # flag value set - api_base = f"http://{host}:{port}" - else: - api_base = test - openai.api_base = api_base - openai.api_key = "temp-key" - print(openai.api_base) - - response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages = [ - { - "role": "user", - "content": "this is a test request, acknowledge that you got it" - } - ]) - click.echo(f'LiteLLM: response from proxy {response}') - - click.echo(f'LiteLLM: response from proxy with streaming {response}') - response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages = [ - { - "role": "user", - "content": "this is a test request, acknowledge that you got it" - } - ], - stream=True, - ) - for chunk in response: - click.echo(f'LiteLLM: streaming response from proxy {chunk}') - return - else: - initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt) - try: - import uvicorn - except: - raise ImportError("Uvicorn needs to be imported. Run - `pip install uvicorn`") - print(f"\033[32mLiteLLM: Deployed Proxy Locally\033[0m\n") - print(f"\033[32mLiteLLM: Test your local endpoint with: \"litellm --test\" [In a new terminal tab]\033[0m\n") - print(f"\033[32mLiteLLM: Deploy your proxy using the following: \"litellm --model claude-instant-1 --deploy\" Get an https://api.litellm.ai/chat/completions endpoint \033[0m\n") - - if port == 8000 and is_port_in_use(port): - port = random.randint(1024, 49152) - uvicorn.run(app, host=host, port=port) - - -if __name__ == "__main__": - run_server() diff --git a/litellm/proxy/litellm-proxy/proxy_server.py b/litellm/proxy/litellm-proxy/proxy_server.py deleted file mode 100644 index 1aaa1472f3..0000000000 --- a/litellm/proxy/litellm-proxy/proxy_server.py +++ /dev/null @@ -1,461 +0,0 @@ -import sys, os, platform, time, copy -import threading -import shutil, random, traceback -# sys.path.insert( -# 0, os.path.abspath("../..") -# ) # Adds the parent directory to the system path - for litellm local dev - - -try: - import uvicorn - import fastapi - import tomli as tomllib - import appdirs -except ImportError: - import subprocess - import sys - - subprocess.check_call([sys.executable, "-m", "pip", "install", "uvicorn", "fastapi", "tomli", "appdirs"]) - import uvicorn - import fastapi - import tomli as tomllib - import appdirs - -import random -list_of_messages = [ - "'The thing I wish you improved is...'", - "'A feature I really want is...'", - "'The worst thing about this product is...'", - "'This product would be better if...'", - "'I don't like how this works...'", - "'It would help me if you could add...'", - "'This feature doesn't meet my needs because...'", - "'I get frustrated when the product...'", -] - -def generate_feedback_box(): - box_width = 60 - - # Select a random message - message = random.choice(list_of_messages) - - print() - print('\033[1;37m' + '#' + '-'*box_width + '#\033[0m') - print('\033[1;37m' + '#' + ' '*box_width + '#\033[0m') - print('\033[1;37m' + '# {:^59} #\033[0m'.format(message)) - print('\033[1;37m' + '# {:^59} #\033[0m'.format('https://github.com/BerriAI/litellm/issues/new')) - print('\033[1;37m' + '#' + ' '*box_width + '#\033[0m') - print('\033[1;37m' + '#' + '-'*box_width + '#\033[0m') - print() - print(' Thank you for using LiteLLM! - Krrish & Ishaan') - print() - print() - -generate_feedback_box() - - -print() -print("\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m") -print() -print("\033[1;34mDocs: https://docs.litellm.ai/docs/proxy_server\033[0m") -print() - -import litellm -from fastapi import FastAPI, Request -from fastapi.routing import APIRouter -from fastapi.responses import StreamingResponse, FileResponse -import json -import logging - -app = FastAPI() -router = APIRouter() - -user_api_base = None -user_model = None -user_debug = False -user_max_tokens = None -user_temperature = None -user_telemetry = True -user_config = None -config_filename = "secrets.toml" -config_dir = os.getcwd() -user_config_path = os.path.join(config_dir, config_filename) -log_file = 'api_log.json' -#### HELPER FUNCTIONS #### -def print_verbose(print_statement): - global user_debug - if user_debug: - print(print_statement) - -def usage_telemetry(feature: str): # helps us know if people are using this feature. Set `litellm --telemetry False` to your cli call to turn this off - print(f"user_telemtry: {user_telemetry}") - if user_telemetry: - print(f"feature telemetry: {feature}") - data = { - "feature": feature # "local_proxy_server" - } - threading.Thread(target=litellm.utils.litellm_telemetry, args=(data,), daemon=True).start() - -def load_config(): - try: - global user_config, user_api_base, user_max_tokens, user_temperature, user_model - # As the .env file is typically much simpler in structure, we use load_dotenv here directly - with open(user_config_path, "rb") as f: - user_config = tomllib.load(f) - - ## load keys - if "keys" in user_config: - for key in user_config["keys"]: - if key == "HUGGINGFACE_API_KEY": - litellm.huggingface_key = user_config["keys"][key] - elif key == "OPENAI_API_KEY": - litellm.openai_key = user_config["keys"][key] - elif key == "TOGETHERAI_API_KEY": - litellm.togetherai_api_key = user_config["keys"][key] - elif key == "NLP_CLOUD_API_KEY": - litellm.nlp_cloud_key = user_config["keys"][key] - elif key == "ANTHROPIC_API_KEY": - litellm.anthropic_key = user_config["keys"][key] - elif key == "REPLICATE_API_KEY": - litellm.replicate_key = user_config["keys"][key] - elif key == "AWS_ACCESS_KEY_ID": - os.environ["AWS_ACCESS_KEY_ID"] = user_config["keys"][key] - elif key == "AWS_SECRET_ACCESS_KEY": - os.environ["AWS_SECRET_ACCESS_KEY"] = user_config["keys"][key] - - ## settings - litellm.add_function_to_prompt = user_config["general"].get("add_function_to_prompt", True) # by default add function to prompt if unsupported by provider - litellm.drop_params = user_config["general"].get("drop_params", True) # by default drop params if unsupported by provider - - ## load model config - to set this run `litellm --config` - model_config = None - if user_model in user_config["model"]: - model_config = user_config["model"][user_model] - - print_verbose(f"user_config: {user_config}") - print_verbose(f"model_config: {model_config}") - if model_config is None: - return - user_model = model_config["model_name"] # raise an error if this isn't set when user runs either `litellm --model local_model` or `litellm --model hosted_model` - print_verbose(f"user_model: {user_model}") - - - user_max_tokens = model_config.get("max_tokens", None) - user_temperature = model_config.get("temperature", None) - user_api_base = model_config.get("api_base", None) - - ## custom prompt template - if "prompt_template" in model_config: - model_prompt_template = model_config["prompt_template"] - if len(model_prompt_template.keys()) > 0: # if user has initialized this at all - litellm.register_prompt_template( - model=user_model, - initial_prompt_value=model_prompt_template.get("MODEL_PRE_PROMPT", ""), - roles={ - "system": { - "pre_message": model_prompt_template.get("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""), - "post_message": model_prompt_template.get("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""), - }, - "user": { - "pre_message": model_prompt_template.get("MODEL_USER_MESSAGE_START_TOKEN", ""), - "post_message": model_prompt_template.get("MODEL_USER_MESSAGE_END_TOKEN", ""), - }, - "assistant": { - "pre_message": model_prompt_template.get("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""), - "post_message": model_prompt_template.get("MODEL_ASSISTANT_MESSAGE_END_TOKEN", ""), - } - }, - final_prompt_value=model_prompt_template.get("MODEL_POST_PROMPT", ""), - ) - except Exception as e: - traceback.print_exc() - -def initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt): - global user_model, user_api_base, user_debug, user_max_tokens, user_temperature, user_telemetry - user_model = model - user_debug = debug - - load_config() - user_api_base = api_base - user_max_tokens = max_tokens - user_temperature = temperature - user_telemetry = telemetry - usage_telemetry(feature="local_proxy_server") - if drop_params == True: - litellm.drop_params = True - if add_function_to_prompt == True: - litellm.add_function_to_prompt = True - if max_budget: - litellm.max_budget = max_budget - - -def deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy): - import requests - # Load .env file - - # Prepare data for posting - data = { - "model": model, - "api_base": api_base, - "temperature": temperature, - "max_tokens": max_tokens, - } - - # print(data) - - # Make post request to the url - url = "https://litellm-api.onrender.com/deploy" - # url = "http://0.0.0.0:4000/deploy" - - with open(".env", "w") as env_file: - for row in data: - env_file.write(f"{row.upper()}='{data[row]}'\n") - env_file.write("\n\n") - for key in os.environ: - value = os.environ[key] - env_file.write(f"{key}='{value}'\n") - # env_file.write(str(os.environ)) - - files = {"file": open(".env", "rb")} - # print(files) - - - - response = requests.post(url, data=data, files=files) - # print(response) - # Check the status of the request - if response.status_code != 200: - return f"Request to url: {url} failed with status: {response.status_code}" - - # Reading the response - response_data = response.json() - # print(response_data) - url = response_data["url"] - # # Do something with response_data - - return url - - -# for streaming -def data_generator(response): - print_verbose("inside generator") - for chunk in response: - print_verbose(f"returned chunk: {chunk}") - yield f"data: {json.dumps(chunk)}\n\n" - -def track_cost_callback( - kwargs, # kwargs to completion - completion_response, # response from completion - start_time, end_time # start/end time -): - # track cost like this - # { - # "Oct12": { - # "gpt-4": 10, - # "claude-2": 12.01, - # }, - # "Oct 15": { - # "ollama/llama2": 0.0, - # "gpt2": 1.2 - # } - # } - try: - - # for streaming responses - if "complete_streaming_response" in kwargs: - # for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost - completion_response=kwargs["complete_streaming_response"] - input_text = kwargs["messages"] - output_text = completion_response["choices"][0]["message"]["content"] - response_cost = litellm.completion_cost( - model = kwargs["model"], - messages = input_text, - completion=output_text - ) - model = kwargs['model'] - print("streaming response_cost", response_cost) - - # for non streaming responses - else: - # we pass the completion_response obj - if kwargs["stream"] != True: - response_cost = litellm.completion_cost(completion_response=completion_response) - print("regular response_cost", response_cost) - model = completion_response["model"] - - # read/write from json for storing daily model costs - cost_data = {} - try: - with open("costs.json") as f: - cost_data = json.load(f) - except FileNotFoundError: - cost_data = {} - import datetime - date = datetime.datetime.now().strftime("%b-%d-%Y") - if date not in cost_data: - cost_data[date] = {} - - if kwargs["model"] in cost_data[date]: - cost_data[date][kwargs["model"]]["cost"] += response_cost - cost_data[date][kwargs["model"]]["num_requests"] += 1 - else: - cost_data[date][kwargs["model"]] = { - "cost": response_cost, - "num_requests": 1 - } - - with open("costs.json", "w") as f: - json.dump(cost_data, f, indent=2) - - except: - pass - -def logger( - kwargs, # kwargs to completion - completion_response=None, # response from completion - start_time=None, - end_time=None # start/end time -): - log_event_type = kwargs['log_event_type'] - print(f"REACHES LOGGER: {log_event_type}") - try: - if log_event_type == 'pre_api_call': - inference_params = copy.deepcopy(kwargs) - timestamp = inference_params.pop('start_time') - dt_key = timestamp.strftime("%Y%m%d%H%M%S%f")[:23] - log_data = { - dt_key: { - 'pre_api_call': inference_params - } - } - - try: - with open(log_file, 'r') as f: - existing_data = json.load(f) - except FileNotFoundError: - existing_data = {} - - existing_data.update(log_data) - - with open(log_file, 'w') as f: - json.dump(existing_data, f, indent=2) - elif log_event_type == 'post_api_call': - print(f"post api call kwargs: {kwargs}") - if "stream" not in kwargs["optional_params"] or kwargs["optional_params"]["stream"] is False or kwargs.get("complete_streaming_response", False): - inference_params = copy.deepcopy(kwargs) - timestamp = inference_params.pop('start_time') - dt_key = timestamp.strftime("%Y%m%d%H%M%S%f")[:23] - - with open(log_file, 'r') as f: - existing_data = json.load(f) - - existing_data[dt_key]['post_api_call'] = inference_params - - with open(log_file, 'w') as f: - json.dump(existing_data, f, indent=2) - except: - traceback.print_exc() - -litellm.input_callback = [logger] -litellm.success_callback = [logger] -litellm.failure_callback = [logger] - -def litellm_completion(data, type): - try: - if user_model: - data["model"] = user_model - # override with user settings - if user_temperature: - data["temperature"] = user_temperature - if user_max_tokens: - data["max_tokens"] = user_max_tokens - if user_api_base: - data["api_base"] = user_api_base - if type == "completion": - response = litellm.text_completion(**data) - elif type == "chat_completion": - response = litellm.completion(**data) - if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses - return StreamingResponse(data_generator(response), media_type='text/event-stream') - print_verbose(f"response: {response}") - return response - except Exception as e: - if "Invalid response object from API" in str(e): - completion_call_details = {} - if user_model: - completion_call_details["model"] = user_model - else: - completion_call_details["model"] = data['model'] - - if user_api_base: - completion_call_details["api_base"] = user_api_base - else: - completion_call_details["api_base"] = None - print(f"\033[1;31mLiteLLM.Exception: Invalid API Call. Call details: Model: \033[1;37m{completion_call_details['model']}\033[1;31m; LLM Provider: \033[1;37m{e.llm_provider}\033[1;31m; Custom API Base - \033[1;37m{completion_call_details['api_base']}\033[1;31m\033[0m") - if completion_call_details["api_base"] == "http://localhost:11434": - print() - print("Trying to call ollama? Try `litellm --model ollama/llama2 --api_base http://localhost:11434`") - print() - else: - print(f"\033[1;31mLiteLLM.Exception: {str(e)}\033[0m") - return {"message": "An error occurred"}, 500 - -#### API ENDPOINTS #### -@router.get("/models") # if project requires model list -def model_list(): - if user_model != None: - return dict( - data=[{"id": user_model, "object": "model", "created": 1677610602, "owned_by": "openai"}], - object="list", - ) - else: - all_models = litellm.model_list - return dict( - data = [{"id": model, "object": "model", "created": 1677610602, "owned_by": "openai"} for model in all_models], - object="list", - ) - -@router.post("/completions") -async def completion(request: Request): - data = await request.json() - return litellm_completion(data=data, type="completion") - -@router.post("/chat/completions") -async def chat_completion(request: Request): - data = await request.json() - print(f"data passed in: {data}") - response = litellm_completion(data, type="chat_completion") - return response - - -# V1 Endpoints - some apps expect a v1 endpoint - these call the regular function -@router.post("/v1/completions") -async def v1_completion(request: Request): - data = await request.json() - return litellm_completion(data=data, type="completion") - -@router.post("/v1/chat/completions") -async def v1_chat_completion(request: Request): - data = await request.json() - print_verbose(f"data passed in: {data}") - response = litellm_completion(data, type="chat_completion") - return response - -def print_cost_logs(): - with open('costs.json', 'r') as f: - # print this in green - print("\033[1;32m") - print(f.read()) - print("\033[0m") - return - -@router.get("/ollama_logs") -async def retrieve_server_log(request: Request): - filepath = os.path.expanduser('~/.ollama/logs/server.log') - return FileResponse(filepath) - -@router.get("/") -async def home(request: Request): - return "LiteLLM: RUNNING" - -app.include_router(router) \ No newline at end of file diff --git a/litellm/proxy/litellm-proxy/secrets_template.toml b/litellm/proxy/litellm-proxy/secrets_template.toml deleted file mode 100644 index 3297ce724a..0000000000 --- a/litellm/proxy/litellm-proxy/secrets_template.toml +++ /dev/null @@ -1,31 +0,0 @@ -[keys] -# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key -# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key -# TOGETHERAI_API_KEY="" # Uncomment to save your TogetherAI API key -# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key -# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key -# REPLICATE_API_KEY="" # Uncomment to save your Replicate API key -# AWS_ACCESS_KEY_ID = "" # Uncomment to save your Bedrock/Sagemaker access keys -# AWS_SECRET_ACCESS_KEY = "" # Uncomment to save your Bedrock/Sagemaker access keys - -[general] -# add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead -# drop_params = True # drop any params not supported by the provider (e.g. Ollama) - -[model."ollama/llama2"] # run via `litellm --model ollama/llama2` -# max_tokens = "" # set max tokens for the model -# temperature = "" # set temperature for the model -# api_base = "" # set a custom api base for the model - -[model."ollama/llama2".prompt_template] # [OPTIONAL] LiteLLM can automatically formats the prompt - docs: https://docs.litellm.ai/docs/completion/prompt_formatting -# MODEL_SYSTEM_MESSAGE_START_TOKEN = "[INST] <>\n" # This does not need to be a token, can be any string -# MODEL_SYSTEM_MESSAGE_END_TOKEN = "\n<>\n [/INST]\n" # This does not need to be a token, can be any string - -# MODEL_USER_MESSAGE_START_TOKEN = "[INST] " # This does not need to be a token, can be any string -# MODEL_USER_MESSAGE_END_TOKEN = " [/INST]\n" # Applies only to user messages. Can be any string. - -# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "" # Applies only to assistant messages. Can be any string. -# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "\n" # Applies only to system messages. Can be any string. - -# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt -# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt \ No newline at end of file