forked from phoenix/litellm-mirror
docs(proxy): added readme
This commit is contained in:
parent
b28c055896
commit
4f172101df
3 changed files with 61 additions and 10 deletions
47
litellm/proxy/README.md
Normal file
47
litellm/proxy/README.md
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
# litellm-proxy
|
||||||
|
|
||||||
|
A local, fast, and lightweight **OpenAI-compatible server** to call 100+ LLM APIs.
|
||||||
|
|
||||||
|
## usage
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ pip install litellm
|
||||||
|
```
|
||||||
|
```shell
|
||||||
|
$ litellm --model ollama/codellama
|
||||||
|
|
||||||
|
#INFO: Ollama running on http://0.0.0.0:8000
|
||||||
|
```
|
||||||
|
|
||||||
|
## replace openai base
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
|
||||||
|
openai.api_base = "http://0.0.0.0:8000"
|
||||||
|
|
||||||
|
print(openai.ChatCompletion.create(model="test", messages=[{"role":"user", "content":"Hey!"}]))
|
||||||
|
```
|
||||||
|
|
||||||
|
[**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/proxy_server)
|
||||||
|
|
||||||
|
## configure proxy
|
||||||
|
|
||||||
|
To save API Keys, change model prompt, etc. you'll need to create a local instance of it:
|
||||||
|
```shell
|
||||||
|
$ litellm --create-proxy
|
||||||
|
```
|
||||||
|
This will create a local project called `litellm-proxy` in your current directory, that has:
|
||||||
|
* **proxy_cli.py**: Runs the proxy
|
||||||
|
* **proxy_server.py**: Contains the API calling logic
|
||||||
|
- `/chat/completions`: receives `openai.ChatCompletion.create` call.
|
||||||
|
- `/completions`: receives `openai.Completion.create` call.
|
||||||
|
- `/models`: receives `openai.Model.list()` call
|
||||||
|
* **secrets.toml**: Stores your api keys, model configs, etc.
|
||||||
|
|
||||||
|
Run it by doing:
|
||||||
|
```shell
|
||||||
|
$ cd litellm-proxy
|
||||||
|
```
|
||||||
|
```shell
|
||||||
|
$ python proxy_cli.py --model ollama/llama # replace with your model name
|
||||||
|
```
|
|
@ -7,6 +7,7 @@ from dotenv import load_dotenv
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
from importlib import resources
|
from importlib import resources
|
||||||
import shutil
|
import shutil
|
||||||
|
telemetry = None
|
||||||
|
|
||||||
def run_ollama_serve():
|
def run_ollama_serve():
|
||||||
command = ['ollama', 'serve']
|
command = ['ollama', 'serve']
|
||||||
|
@ -15,7 +16,6 @@ def run_ollama_serve():
|
||||||
process = subprocess.Popen(command, stdout=devnull, stderr=devnull)
|
process = subprocess.Popen(command, stdout=devnull, stderr=devnull)
|
||||||
|
|
||||||
def clone_subfolder(repo_url, subfolder, destination):
|
def clone_subfolder(repo_url, subfolder, destination):
|
||||||
|
|
||||||
# Clone the full repo
|
# Clone the full repo
|
||||||
repo_name = repo_url.split('/')[-1]
|
repo_name = repo_url.split('/')[-1]
|
||||||
repo_master = os.path.join(destination, "repo_master")
|
repo_master = os.path.join(destination, "repo_master")
|
||||||
|
@ -35,6 +35,7 @@ def clone_subfolder(repo_url, subfolder, destination):
|
||||||
|
|
||||||
# Remove cloned repo folder
|
# Remove cloned repo folder
|
||||||
subprocess.run(['rm', '-rf', os.path.join(destination, "repo_master")])
|
subprocess.run(['rm', '-rf', os.path.join(destination, "repo_master")])
|
||||||
|
feature_telemetry(feature="create-proxy")
|
||||||
|
|
||||||
def is_port_in_use(port):
|
def is_port_in_use(port):
|
||||||
import socket
|
import socket
|
||||||
|
@ -59,15 +60,16 @@ def is_port_in_use(port):
|
||||||
@click.option('--local', is_flag=True, default=False, help='for local debugging')
|
@click.option('--local', is_flag=True, default=False, help='for local debugging')
|
||||||
@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
|
@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
|
||||||
def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, max_budget, telemetry, test, local, cost):
|
def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, max_budget, telemetry, test, local, cost):
|
||||||
|
global feature_telemetry
|
||||||
if local:
|
if local:
|
||||||
from proxy_server import app, initialize, deploy_proxy, print_cost_logs
|
from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
|
||||||
debug = True
|
debug = True
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
from .proxy_server import app, initialize, deploy_proxy, print_cost_logs
|
from .proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
from proxy_server import app, initialize, deploy_proxy, print_cost_logs
|
from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
|
||||||
|
feature_telemetry = usage_telemetry
|
||||||
if create_proxy == True:
|
if create_proxy == True:
|
||||||
repo_url = 'https://github.com/BerriAI/litellm'
|
repo_url = 'https://github.com/BerriAI/litellm'
|
||||||
subfolder = 'litellm/proxy'
|
subfolder = 'litellm/proxy'
|
||||||
|
|
|
@ -75,7 +75,7 @@ user_model = None
|
||||||
user_debug = False
|
user_debug = False
|
||||||
user_max_tokens = None
|
user_max_tokens = None
|
||||||
user_temperature = None
|
user_temperature = None
|
||||||
user_telemetry = False
|
user_telemetry = True
|
||||||
user_config = None
|
user_config = None
|
||||||
config_filename = "secrets.toml"
|
config_filename = "secrets.toml"
|
||||||
config_dir = os.getcwd()
|
config_dir = os.getcwd()
|
||||||
|
@ -87,12 +87,14 @@ def print_verbose(print_statement):
|
||||||
if user_debug:
|
if user_debug:
|
||||||
print(print_statement)
|
print(print_statement)
|
||||||
|
|
||||||
def usage_telemetry(): # helps us know if people are using this feature. Set `litellm --telemetry False` to your cli call to turn this off
|
def usage_telemetry(feature: str): # helps us know if people are using this feature. Set `litellm --telemetry False` to your cli call to turn this off
|
||||||
|
print(f"user_telemtry: {user_telemetry}")
|
||||||
if user_telemetry:
|
if user_telemetry:
|
||||||
|
print(f"feature telemetry: {feature}")
|
||||||
data = {
|
data = {
|
||||||
"feature": "local_proxy_server"
|
"feature": feature # "local_proxy_server"
|
||||||
}
|
}
|
||||||
threading.Thread(target=litellm.utils.litellm_telemetry, args=(data,)).start()
|
threading.Thread(target=litellm.utils.litellm_telemetry, args=(data,), daemon=True).start()
|
||||||
|
|
||||||
def load_config():
|
def load_config():
|
||||||
try:
|
try:
|
||||||
|
@ -174,7 +176,7 @@ def initialize(model, api_base, debug, temperature, max_tokens, max_budget, tele
|
||||||
user_max_tokens = max_tokens
|
user_max_tokens = max_tokens
|
||||||
user_temperature = temperature
|
user_temperature = temperature
|
||||||
user_telemetry = telemetry
|
user_telemetry = telemetry
|
||||||
usage_telemetry()
|
usage_telemetry(feature="local_proxy_server")
|
||||||
if drop_params == True:
|
if drop_params == True:
|
||||||
litellm.drop_params = True
|
litellm.drop_params = True
|
||||||
if add_function_to_prompt == True:
|
if add_function_to_prompt == True:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue