diff --git a/litellm/proxy/README.md b/litellm/proxy/README.md new file mode 100644 index 000000000..413c55b2c --- /dev/null +++ b/litellm/proxy/README.md @@ -0,0 +1,47 @@ +# litellm-proxy + +A local, fast, and lightweight **OpenAI-compatible server** to call 100+ LLM APIs. + +## usage + +```shell +$ pip install litellm +``` +```shell +$ litellm --model ollama/codellama + +#INFO: Ollama running on http://0.0.0.0:8000 +``` + +## replace openai base +```python +import openai + +openai.api_base = "http://0.0.0.0:8000" + +print(openai.ChatCompletion.create(model="test", messages=[{"role":"user", "content":"Hey!"}])) +``` + +[**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/proxy_server) + +## configure proxy + +To save API Keys, change model prompt, etc. you'll need to create a local instance of it: +```shell +$ litellm --create-proxy +``` +This will create a local project called `litellm-proxy` in your current directory, that has: +* **proxy_cli.py**: Runs the proxy +* **proxy_server.py**: Contains the API calling logic + - `/chat/completions`: receives `openai.ChatCompletion.create` call. + - `/completions`: receives `openai.Completion.create` call. + - `/models`: receives `openai.Model.list()` call +* **secrets.toml**: Stores your api keys, model configs, etc. + +Run it by doing: +```shell +$ cd litellm-proxy +``` +```shell +$ python proxy_cli.py --model ollama/llama # replace with your model name +``` \ No newline at end of file diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 19b105a1a..b32d83630 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -7,6 +7,7 @@ from dotenv import load_dotenv load_dotenv() from importlib import resources import shutil +telemetry = None def run_ollama_serve(): command = ['ollama', 'serve'] @@ -15,7 +16,6 @@ def run_ollama_serve(): process = subprocess.Popen(command, stdout=devnull, stderr=devnull) def clone_subfolder(repo_url, subfolder, destination): - # Clone the full repo repo_name = repo_url.split('/')[-1] repo_master = os.path.join(destination, "repo_master") @@ -35,6 +35,7 @@ def clone_subfolder(repo_url, subfolder, destination): # Remove cloned repo folder subprocess.run(['rm', '-rf', os.path.join(destination, "repo_master")]) + feature_telemetry(feature="create-proxy") def is_port_in_use(port): import socket @@ -59,15 +60,16 @@ def is_port_in_use(port): @click.option('--local', is_flag=True, default=False, help='for local debugging') @click.option('--cost', is_flag=True, default=False, help='for viewing cost logs') def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, max_budget, telemetry, test, local, cost): + global feature_telemetry if local: - from proxy_server import app, initialize, deploy_proxy, print_cost_logs + from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry debug = True else: try: - from .proxy_server import app, initialize, deploy_proxy, print_cost_logs + from .proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry except ImportError as e: - from proxy_server import app, initialize, deploy_proxy, print_cost_logs - + from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry + feature_telemetry = usage_telemetry if create_proxy == True: repo_url = 'https://github.com/BerriAI/litellm' subfolder = 'litellm/proxy' diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 186f46252..6bede366a 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -75,7 +75,7 @@ user_model = None user_debug = False user_max_tokens = None user_temperature = None -user_telemetry = False +user_telemetry = True user_config = None config_filename = "secrets.toml" config_dir = os.getcwd() @@ -87,12 +87,14 @@ def print_verbose(print_statement): if user_debug: print(print_statement) -def usage_telemetry(): # helps us know if people are using this feature. Set `litellm --telemetry False` to your cli call to turn this off +def usage_telemetry(feature: str): # helps us know if people are using this feature. Set `litellm --telemetry False` to your cli call to turn this off + print(f"user_telemtry: {user_telemetry}") if user_telemetry: + print(f"feature telemetry: {feature}") data = { - "feature": "local_proxy_server" + "feature": feature # "local_proxy_server" } - threading.Thread(target=litellm.utils.litellm_telemetry, args=(data,)).start() + threading.Thread(target=litellm.utils.litellm_telemetry, args=(data,), daemon=True).start() def load_config(): try: @@ -174,7 +176,7 @@ def initialize(model, api_base, debug, temperature, max_tokens, max_budget, tele user_max_tokens = max_tokens user_temperature = temperature user_telemetry = telemetry - usage_telemetry() + usage_telemetry(feature="local_proxy_server") if drop_params == True: litellm.drop_params = True if add_function_to_prompt == True: