docs(proxy): added readme

This commit is contained in:
Krrish Dholakia 2023-10-12 21:09:40 -07:00
parent b28c055896
commit 4f172101df
3 changed files with 61 additions and 10 deletions

47
litellm/proxy/README.md Normal file
View file

@ -0,0 +1,47 @@
# litellm-proxy
A local, fast, and lightweight **OpenAI-compatible server** to call 100+ LLM APIs.
## usage
```shell
$ pip install litellm
```
```shell
$ litellm --model ollama/codellama
#INFO: Ollama running on http://0.0.0.0:8000
```
## replace openai base
```python
import openai
openai.api_base = "http://0.0.0.0:8000"
print(openai.ChatCompletion.create(model="test", messages=[{"role":"user", "content":"Hey!"}]))
```
[**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/proxy_server)
## configure proxy
To save API Keys, change model prompt, etc. you'll need to create a local instance of it:
```shell
$ litellm --create-proxy
```
This will create a local project called `litellm-proxy` in your current directory, that has:
* **proxy_cli.py**: Runs the proxy
* **proxy_server.py**: Contains the API calling logic
- `/chat/completions`: receives `openai.ChatCompletion.create` call.
- `/completions`: receives `openai.Completion.create` call.
- `/models`: receives `openai.Model.list()` call
* **secrets.toml**: Stores your api keys, model configs, etc.
Run it by doing:
```shell
$ cd litellm-proxy
```
```shell
$ python proxy_cli.py --model ollama/llama # replace with your model name
```

View file

@ -7,6 +7,7 @@ from dotenv import load_dotenv
load_dotenv()
from importlib import resources
import shutil
telemetry = None
def run_ollama_serve():
command = ['ollama', 'serve']
@ -15,7 +16,6 @@ def run_ollama_serve():
process = subprocess.Popen(command, stdout=devnull, stderr=devnull)
def clone_subfolder(repo_url, subfolder, destination):
# Clone the full repo
repo_name = repo_url.split('/')[-1]
repo_master = os.path.join(destination, "repo_master")
@ -35,6 +35,7 @@ def clone_subfolder(repo_url, subfolder, destination):
# Remove cloned repo folder
subprocess.run(['rm', '-rf', os.path.join(destination, "repo_master")])
feature_telemetry(feature="create-proxy")
def is_port_in_use(port):
import socket
@ -59,15 +60,16 @@ def is_port_in_use(port):
@click.option('--local', is_flag=True, default=False, help='for local debugging')
@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, max_budget, telemetry, test, local, cost):
global feature_telemetry
if local:
from proxy_server import app, initialize, deploy_proxy, print_cost_logs
from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
debug = True
else:
try:
from .proxy_server import app, initialize, deploy_proxy, print_cost_logs
from .proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
except ImportError as e:
from proxy_server import app, initialize, deploy_proxy, print_cost_logs
from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
feature_telemetry = usage_telemetry
if create_proxy == True:
repo_url = 'https://github.com/BerriAI/litellm'
subfolder = 'litellm/proxy'

View file

@ -75,7 +75,7 @@ user_model = None
user_debug = False
user_max_tokens = None
user_temperature = None
user_telemetry = False
user_telemetry = True
user_config = None
config_filename = "secrets.toml"
config_dir = os.getcwd()
@ -87,12 +87,14 @@ def print_verbose(print_statement):
if user_debug:
print(print_statement)
def usage_telemetry(): # helps us know if people are using this feature. Set `litellm --telemetry False` to your cli call to turn this off
def usage_telemetry(feature: str): # helps us know if people are using this feature. Set `litellm --telemetry False` to your cli call to turn this off
print(f"user_telemtry: {user_telemetry}")
if user_telemetry:
print(f"feature telemetry: {feature}")
data = {
"feature": "local_proxy_server"
"feature": feature # "local_proxy_server"
}
threading.Thread(target=litellm.utils.litellm_telemetry, args=(data,)).start()
threading.Thread(target=litellm.utils.litellm_telemetry, args=(data,), daemon=True).start()
def load_config():
try:
@ -174,7 +176,7 @@ def initialize(model, api_base, debug, temperature, max_tokens, max_budget, tele
user_max_tokens = max_tokens
user_temperature = temperature
user_telemetry = telemetry
usage_telemetry()
usage_telemetry(feature="local_proxy_server")
if drop_params == True:
litellm.drop_params = True
if add_function_to_prompt == True: