forked from phoenix/litellm-mirror
docs(proxy): added readme
This commit is contained in:
parent
b28c055896
commit
4f172101df
3 changed files with 61 additions and 10 deletions
47
litellm/proxy/README.md
Normal file
47
litellm/proxy/README.md
Normal file
|
@ -0,0 +1,47 @@
|
|||
# litellm-proxy
|
||||
|
||||
A local, fast, and lightweight **OpenAI-compatible server** to call 100+ LLM APIs.
|
||||
|
||||
## usage
|
||||
|
||||
```shell
|
||||
$ pip install litellm
|
||||
```
|
||||
```shell
|
||||
$ litellm --model ollama/codellama
|
||||
|
||||
#INFO: Ollama running on http://0.0.0.0:8000
|
||||
```
|
||||
|
||||
## replace openai base
|
||||
```python
|
||||
import openai
|
||||
|
||||
openai.api_base = "http://0.0.0.0:8000"
|
||||
|
||||
print(openai.ChatCompletion.create(model="test", messages=[{"role":"user", "content":"Hey!"}]))
|
||||
```
|
||||
|
||||
[**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/proxy_server)
|
||||
|
||||
## configure proxy
|
||||
|
||||
To save API Keys, change model prompt, etc. you'll need to create a local instance of it:
|
||||
```shell
|
||||
$ litellm --create-proxy
|
||||
```
|
||||
This will create a local project called `litellm-proxy` in your current directory, that has:
|
||||
* **proxy_cli.py**: Runs the proxy
|
||||
* **proxy_server.py**: Contains the API calling logic
|
||||
- `/chat/completions`: receives `openai.ChatCompletion.create` call.
|
||||
- `/completions`: receives `openai.Completion.create` call.
|
||||
- `/models`: receives `openai.Model.list()` call
|
||||
* **secrets.toml**: Stores your api keys, model configs, etc.
|
||||
|
||||
Run it by doing:
|
||||
```shell
|
||||
$ cd litellm-proxy
|
||||
```
|
||||
```shell
|
||||
$ python proxy_cli.py --model ollama/llama # replace with your model name
|
||||
```
|
|
@ -7,6 +7,7 @@ from dotenv import load_dotenv
|
|||
load_dotenv()
|
||||
from importlib import resources
|
||||
import shutil
|
||||
telemetry = None
|
||||
|
||||
def run_ollama_serve():
|
||||
command = ['ollama', 'serve']
|
||||
|
@ -15,7 +16,6 @@ def run_ollama_serve():
|
|||
process = subprocess.Popen(command, stdout=devnull, stderr=devnull)
|
||||
|
||||
def clone_subfolder(repo_url, subfolder, destination):
|
||||
|
||||
# Clone the full repo
|
||||
repo_name = repo_url.split('/')[-1]
|
||||
repo_master = os.path.join(destination, "repo_master")
|
||||
|
@ -35,6 +35,7 @@ def clone_subfolder(repo_url, subfolder, destination):
|
|||
|
||||
# Remove cloned repo folder
|
||||
subprocess.run(['rm', '-rf', os.path.join(destination, "repo_master")])
|
||||
feature_telemetry(feature="create-proxy")
|
||||
|
||||
def is_port_in_use(port):
|
||||
import socket
|
||||
|
@ -59,15 +60,16 @@ def is_port_in_use(port):
|
|||
@click.option('--local', is_flag=True, default=False, help='for local debugging')
|
||||
@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
|
||||
def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, max_budget, telemetry, test, local, cost):
|
||||
global feature_telemetry
|
||||
if local:
|
||||
from proxy_server import app, initialize, deploy_proxy, print_cost_logs
|
||||
from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
|
||||
debug = True
|
||||
else:
|
||||
try:
|
||||
from .proxy_server import app, initialize, deploy_proxy, print_cost_logs
|
||||
from .proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
|
||||
except ImportError as e:
|
||||
from proxy_server import app, initialize, deploy_proxy, print_cost_logs
|
||||
|
||||
from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
|
||||
feature_telemetry = usage_telemetry
|
||||
if create_proxy == True:
|
||||
repo_url = 'https://github.com/BerriAI/litellm'
|
||||
subfolder = 'litellm/proxy'
|
||||
|
|
|
@ -75,7 +75,7 @@ user_model = None
|
|||
user_debug = False
|
||||
user_max_tokens = None
|
||||
user_temperature = None
|
||||
user_telemetry = False
|
||||
user_telemetry = True
|
||||
user_config = None
|
||||
config_filename = "secrets.toml"
|
||||
config_dir = os.getcwd()
|
||||
|
@ -87,12 +87,14 @@ def print_verbose(print_statement):
|
|||
if user_debug:
|
||||
print(print_statement)
|
||||
|
||||
def usage_telemetry(): # helps us know if people are using this feature. Set `litellm --telemetry False` to your cli call to turn this off
|
||||
def usage_telemetry(feature: str): # helps us know if people are using this feature. Set `litellm --telemetry False` to your cli call to turn this off
|
||||
print(f"user_telemtry: {user_telemetry}")
|
||||
if user_telemetry:
|
||||
print(f"feature telemetry: {feature}")
|
||||
data = {
|
||||
"feature": "local_proxy_server"
|
||||
"feature": feature # "local_proxy_server"
|
||||
}
|
||||
threading.Thread(target=litellm.utils.litellm_telemetry, args=(data,)).start()
|
||||
threading.Thread(target=litellm.utils.litellm_telemetry, args=(data,), daemon=True).start()
|
||||
|
||||
def load_config():
|
||||
try:
|
||||
|
@ -174,7 +176,7 @@ def initialize(model, api_base, debug, temperature, max_tokens, max_budget, tele
|
|||
user_max_tokens = max_tokens
|
||||
user_temperature = temperature
|
||||
user_telemetry = telemetry
|
||||
usage_telemetry()
|
||||
usage_telemetry(feature="local_proxy_server")
|
||||
if drop_params == True:
|
||||
litellm.drop_params = True
|
||||
if add_function_to_prompt == True:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue