forked from phoenix/litellm-mirror
Merge pull request #2416 from BerriAI/litellm_use_consistent_port
(docs) LiteLLM Proxy - use port 4000 in examples
This commit is contained in:
commit
a1784284bb
33 changed files with 179 additions and 179 deletions
|
@ -143,13 +143,13 @@ pip install 'litellm[proxy]'
|
||||||
```shell
|
```shell
|
||||||
$ litellm --model huggingface/bigcode/starcoder
|
$ litellm --model huggingface/bigcode/starcoder
|
||||||
|
|
||||||
#INFO: Proxy running on http://0.0.0.0:8000
|
#INFO: Proxy running on http://0.0.0.0:4000
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 2: Make ChatCompletions Request to Proxy
|
### Step 2: Make ChatCompletions Request to Proxy
|
||||||
```python
|
```python
|
||||||
import openai # openai v1.0.0+
|
import openai # openai v1.0.0+
|
||||||
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:8000") # set proxy to base_url
|
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
||||||
{
|
{
|
||||||
|
@ -170,7 +170,7 @@ Set budgets and rate limits across multiple projects
|
||||||
|
|
||||||
### Request
|
### Request
|
||||||
```shell
|
```shell
|
||||||
curl 'http://0.0.0.0:8000/key/generate' \
|
curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai", "team": "core-infra"}}'
|
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai", "team": "core-infra"}}'
|
||||||
|
|
|
@ -28,7 +28,7 @@ If `db.useStackgresOperator` is used (not yet implemented):
|
||||||
| `imagePullSecrets` | Registry credentials for the LiteLLM and initContainer images. | `[]` |
|
| `imagePullSecrets` | Registry credentials for the LiteLLM and initContainer images. | `[]` |
|
||||||
| `serviceAccount.create` | Whether or not to create a Kubernetes Service Account for this deployment. The default is `false` because LiteLLM has no need to access the Kubernetes API. | `false` |
|
| `serviceAccount.create` | Whether or not to create a Kubernetes Service Account for this deployment. The default is `false` because LiteLLM has no need to access the Kubernetes API. | `false` |
|
||||||
| `service.type` | Kubernetes Service type (e.g. `LoadBalancer`, `ClusterIP`, etc.) | `ClusterIP` |
|
| `service.type` | Kubernetes Service type (e.g. `LoadBalancer`, `ClusterIP`, etc.) | `ClusterIP` |
|
||||||
| `service.port` | TCP port that the Kubernetes Service will listen on. Also the TCP port within the Pod that the proxy will listen on. | `8000` |
|
| `service.port` | TCP port that the Kubernetes Service will listen on. Also the TCP port within the Pod that the proxy will listen on. | `4000` |
|
||||||
| `ingress.*` | See [values.yaml](./values.yaml) for example settings | N/A |
|
| `ingress.*` | See [values.yaml](./values.yaml) for example settings | N/A |
|
||||||
| `proxy_config.*` | See [values.yaml](./values.yaml) for default settings. See [example_config_yaml](../../../litellm/proxy/example_config_yaml/) for configuration examples. | N/A |
|
| `proxy_config.*` | See [values.yaml](./values.yaml) for default settings. See [example_config_yaml](../../../litellm/proxy/example_config_yaml/) for configuration examples. | N/A |
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ When browsing to the URL published per the settings in `ingress.*`, you will
|
||||||
be prompted for **Admin Configuration**. The **Proxy Endpoint** is the internal
|
be prompted for **Admin Configuration**. The **Proxy Endpoint** is the internal
|
||||||
(from the `litellm` pod's perspective) URL published by the `<RELEASE>-litellm`
|
(from the `litellm` pod's perspective) URL published by the `<RELEASE>-litellm`
|
||||||
Kubernetes Service. If the deployment uses the default settings for this
|
Kubernetes Service. If the deployment uses the default settings for this
|
||||||
service, the **Proxy Endpoint** should be set to `http://<RELEASE>-litellm:8000`.
|
service, the **Proxy Endpoint** should be set to `http://<RELEASE>-litellm:4000`.
|
||||||
|
|
||||||
The **Proxy Key** is the value specified for `masterkey` or, if a `masterkey`
|
The **Proxy Key** is the value specified for `masterkey` or, if a `masterkey`
|
||||||
was not provided to the helm command line, the `masterkey` is a randomly
|
was not provided to the helm command line, the `masterkey` is a randomly
|
||||||
|
|
|
@ -55,7 +55,7 @@ environmentSecrets: []
|
||||||
|
|
||||||
service:
|
service:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
port: 8000
|
port: 4000
|
||||||
|
|
||||||
ingress:
|
ingress:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|
|
@ -35,7 +35,7 @@ general_settings:
|
||||||
```bash
|
```bash
|
||||||
litellm --config /path/to/config.yaml
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
# RUNNING on http://0.0.0.0:8000
|
# RUNNING on http://0.0.0.0:4000
|
||||||
```
|
```
|
||||||
|
|
||||||
### Test
|
### Test
|
||||||
|
@ -44,7 +44,7 @@ litellm --config /path/to/config.yaml
|
||||||
<TabItem value="curl" label="Curl">
|
<TabItem value="curl" label="Curl">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:8000/embeddings' \
|
curl --location 'http://0.0.0.0:4000/embeddings' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{"input": ["Academia.edu uses"], "model": "textembedding-gecko", "encoding_format": "base64"}'
|
--data '{"input": ["Academia.edu uses"], "model": "textembedding-gecko", "encoding_format": "base64"}'
|
||||||
|
@ -57,7 +57,7 @@ curl --location 'http://0.0.0.0:8000/embeddings' \
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
api_key="sk-1234",
|
api_key="sk-1234",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
client.embeddings.create(
|
client.embeddings.create(
|
||||||
|
@ -72,7 +72,7 @@ client.embeddings.create(
|
||||||
```python
|
```python
|
||||||
from langchain_openai import OpenAIEmbeddings
|
from langchain_openai import OpenAIEmbeddings
|
||||||
|
|
||||||
embeddings = OpenAIEmbeddings(model="textembedding-gecko", openai_api_base="http://0.0.0.0:8000", openai_api_key="sk-1234")
|
embeddings = OpenAIEmbeddings(model="textembedding-gecko", openai_api_base="http://0.0.0.0:4000", openai_api_key="sk-1234")
|
||||||
|
|
||||||
text = "This is a test document."
|
text = "This is a test document."
|
||||||
|
|
||||||
|
@ -200,7 +200,7 @@ Use this for calling `/embedding` endpoints on OpenAI Compatible Servers, exampl
|
||||||
from litellm import embedding
|
from litellm import embedding
|
||||||
response = embedding(
|
response = embedding(
|
||||||
model = "openai/<your-llm-name>", # add `openai/` prefix to model so litellm knows to route to OpenAI
|
model = "openai/<your-llm-name>", # add `openai/` prefix to model so litellm knows to route to OpenAI
|
||||||
api_base="http://0.0.0.0:8000/" # set API Base of your Custom OpenAI Endpoint
|
api_base="http://0.0.0.0:4000/" # set API Base of your Custom OpenAI Endpoint
|
||||||
input=["good morning from litellm"]
|
input=["good morning from litellm"]
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
|
@ -375,13 +375,13 @@ pip install 'litellm[proxy]'
|
||||||
```shell
|
```shell
|
||||||
$ litellm --model huggingface/bigcode/starcoder
|
$ litellm --model huggingface/bigcode/starcoder
|
||||||
|
|
||||||
#INFO: Proxy running on http://0.0.0.0:8000
|
#INFO: Proxy running on http://0.0.0.0:4000
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 2: Make ChatCompletions Request to Proxy
|
#### Step 2: Make ChatCompletions Request to Proxy
|
||||||
```python
|
```python
|
||||||
import openai # openai v1.0.0+
|
import openai # openai v1.0.0+
|
||||||
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:8000") # set proxy to base_url
|
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
||||||
{
|
{
|
||||||
|
|
|
@ -90,7 +90,7 @@ import time, asyncio, litellm
|
||||||
#### LITELLM PROXY ####
|
#### LITELLM PROXY ####
|
||||||
litellm_client = AsyncOpenAI(
|
litellm_client = AsyncOpenAI(
|
||||||
api_key="sk-1234", # [CHANGE THIS]
|
api_key="sk-1234", # [CHANGE THIS]
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
#### AZURE OPENAI CLIENT ####
|
#### AZURE OPENAI CLIENT ####
|
||||||
|
|
|
@ -63,7 +63,7 @@ export ANTHROPIC_API_KEY="your-api-key"
|
||||||
```bash
|
```bash
|
||||||
$ litellm --model claude-3-opus-20240229
|
$ litellm --model claude-3-opus-20240229
|
||||||
|
|
||||||
# Server running on http://0.0.0.0:8000
|
# Server running on http://0.0.0.0:4000
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Test it
|
### 3. Test it
|
||||||
|
@ -73,7 +73,7 @@ $ litellm --model claude-3-opus-20240229
|
||||||
<TabItem value="Curl" label="Curl Request">
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -93,7 +93,7 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="anything",
|
api_key="anything",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
@ -120,7 +120,7 @@ from langchain.prompts.chat import (
|
||||||
from langchain.schema import HumanMessage, SystemMessage
|
from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
|
||||||
chat = ChatOpenAI(
|
chat = ChatOpenAI(
|
||||||
openai_api_base="http://0.0.0.0:8000", # set openai_api_base to the LiteLLM Proxy
|
openai_api_base="http://0.0.0.0:4000", # set openai_api_base to the LiteLLM Proxy
|
||||||
model = "gpt-3.5-turbo",
|
model = "gpt-3.5-turbo",
|
||||||
temperature=0.1
|
temperature=0.1
|
||||||
)
|
)
|
||||||
|
|
|
@ -54,7 +54,7 @@ export AWS_REGION_NAME=""
|
||||||
```bash
|
```bash
|
||||||
$ litellm --model anthropic.claude-3-sonnet-20240229-v1:0
|
$ litellm --model anthropic.claude-3-sonnet-20240229-v1:0
|
||||||
|
|
||||||
# Server running on http://0.0.0.0:8000
|
# Server running on http://0.0.0.0:4000
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Test it
|
### 3. Test it
|
||||||
|
@ -64,7 +64,7 @@ $ litellm --model anthropic.claude-3-sonnet-20240229-v1:0
|
||||||
<TabItem value="Curl" label="Curl Request">
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -84,7 +84,7 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="anything",
|
api_key="anything",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
@ -111,7 +111,7 @@ from langchain.prompts.chat import (
|
||||||
from langchain.schema import HumanMessage, SystemMessage
|
from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
|
||||||
chat = ChatOpenAI(
|
chat = ChatOpenAI(
|
||||||
openai_api_base="http://0.0.0.0:8000", # set openai_api_base to the LiteLLM Proxy
|
openai_api_base="http://0.0.0.0:4000", # set openai_api_base to the LiteLLM Proxy
|
||||||
model = "gpt-3.5-turbo",
|
model = "gpt-3.5-turbo",
|
||||||
temperature=0.1
|
temperature=0.1
|
||||||
)
|
)
|
||||||
|
|
|
@ -183,7 +183,7 @@ On the docker container run the `test.py` file using `python3 test.py`
|
||||||
```python
|
```python
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
api_base = f"http://0.0.0.0:8000" # base url for server
|
api_base = f"http://0.0.0.0:4000" # base url for server
|
||||||
|
|
||||||
openai.api_base = api_base
|
openai.api_base = api_base
|
||||||
openai.api_key = "temp-key"
|
openai.api_key = "temp-key"
|
||||||
|
|
|
@ -15,7 +15,7 @@ import os
|
||||||
response = litellm.completion(
|
response = litellm.completion(
|
||||||
model="openai/mistral, # add `openai/` prefix to model so litellm knows to route to OpenAI
|
model="openai/mistral, # add `openai/` prefix to model so litellm knows to route to OpenAI
|
||||||
api_key="sk-1234", # api key to your openai compatible endpoint
|
api_key="sk-1234", # api key to your openai compatible endpoint
|
||||||
api_base="http://0.0.0.0:8000", # set API Base of your Custom OpenAI Endpoint
|
api_base="http://0.0.0.0:4000", # set API Base of your Custom OpenAI Endpoint
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
@ -35,7 +35,7 @@ import os
|
||||||
response = litellm.embedding(
|
response = litellm.embedding(
|
||||||
model="openai/GPT-J", # add `openai/` prefix to model so litellm knows to route to OpenAI
|
model="openai/GPT-J", # add `openai/` prefix to model so litellm knows to route to OpenAI
|
||||||
api_key="sk-1234", # api key to your openai compatible endpoint
|
api_key="sk-1234", # api key to your openai compatible endpoint
|
||||||
api_base="http://0.0.0.0:8000", # set API Base of your Custom OpenAI Endpoint
|
api_base="http://0.0.0.0:4000", # set API Base of your Custom OpenAI Endpoint
|
||||||
input=["good morning from litellm"]
|
input=["good morning from litellm"]
|
||||||
)
|
)
|
||||||
print(response)
|
print(response)
|
||||||
|
|
|
@ -145,7 +145,7 @@ $ litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
Send the same request twice:
|
Send the same request twice:
|
||||||
```shell
|
```shell
|
||||||
curl http://0.0.0.0:8000/v1/chat/completions \
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -153,7 +153,7 @@ curl http://0.0.0.0:8000/v1/chat/completions \
|
||||||
"temperature": 0.7
|
"temperature": 0.7
|
||||||
}'
|
}'
|
||||||
|
|
||||||
curl http://0.0.0.0:8000/v1/chat/completions \
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -166,14 +166,14 @@ curl http://0.0.0.0:8000/v1/chat/completions \
|
||||||
|
|
||||||
Send the same request twice:
|
Send the same request twice:
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/embeddings' \
|
curl --location 'http://0.0.0.0:4000/embeddings' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "text-embedding-ada-002",
|
"model": "text-embedding-ada-002",
|
||||||
"input": ["write a litellm poem"]
|
"input": ["write a litellm poem"]
|
||||||
}'
|
}'
|
||||||
|
|
||||||
curl --location 'http://0.0.0.0:8000/embeddings' \
|
curl --location 'http://0.0.0.0:4000/embeddings' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "text-embedding-ada-002",
|
"model": "text-embedding-ada-002",
|
||||||
|
@ -227,7 +227,7 @@ from openai import OpenAI
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
# This is the default and can be omitted
|
# This is the default and can be omitted
|
||||||
api_key=os.environ.get("OPENAI_API_KEY"),
|
api_key=os.environ.get("OPENAI_API_KEY"),
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
chat_completion = client.chat.completions.create(
|
chat_completion = client.chat.completions.create(
|
||||||
|
@ -255,7 +255,7 @@ from openai import OpenAI
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
# This is the default and can be omitted
|
# This is the default and can be omitted
|
||||||
api_key=os.environ.get("OPENAI_API_KEY"),
|
api_key=os.environ.get("OPENAI_API_KEY"),
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
chat_completion = client.chat.completions.create(
|
chat_completion = client.chat.completions.create(
|
||||||
|
@ -281,7 +281,7 @@ from openai import OpenAI
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
# This is the default and can be omitted
|
# This is the default and can be omitted
|
||||||
api_key=os.environ.get("OPENAI_API_KEY"),
|
api_key=os.environ.get("OPENAI_API_KEY"),
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
chat_completion = client.chat.completions.create(
|
chat_completion = client.chat.completions.create(
|
||||||
|
|
|
@ -63,7 +63,7 @@ litellm_settings:
|
||||||
$ litellm /path/to/config.yaml
|
$ litellm /path/to/config.yaml
|
||||||
```
|
```
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
"messages": [
|
"messages": [
|
||||||
|
@ -162,7 +162,7 @@ litellm_settings:
|
||||||
$ litellm /path/to/config.yaml
|
$ litellm /path/to/config.yaml
|
||||||
```
|
```
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
"messages": [
|
"messages": [
|
||||||
|
|
|
@ -15,7 +15,7 @@ Cli arguments, --host, --port, --num_workers
|
||||||
```
|
```
|
||||||
|
|
||||||
## --port
|
## --port
|
||||||
- **Default:** `8000`
|
- **Default:** `4000`
|
||||||
- The port to bind the server to.
|
- The port to bind the server to.
|
||||||
- **Usage:**
|
- **Usage:**
|
||||||
```shell
|
```shell
|
||||||
|
|
|
@ -13,7 +13,7 @@ Set model list, `api_base`, `api_key`, `temperature` & proxy server settings (`m
|
||||||
| `general_settings` | Server settings, example setting `master_key: sk-my_special_key` |
|
| `general_settings` | Server settings, example setting `master_key: sk-my_special_key` |
|
||||||
| `environment_variables` | Environment Variables example, `REDIS_HOST`, `REDIS_PORT` |
|
| `environment_variables` | Environment Variables example, `REDIS_HOST`, `REDIS_PORT` |
|
||||||
|
|
||||||
**Complete List:** Check the Swagger UI docs on `<your-proxy-url>/#/config.yaml` (e.g. http://0.0.0.0:8000/#/config.yaml), for everything you can pass in the config.yaml.
|
**Complete List:** Check the Swagger UI docs on `<your-proxy-url>/#/config.yaml` (e.g. http://0.0.0.0:4000/#/config.yaml), for everything you can pass in the config.yaml.
|
||||||
|
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
@ -55,7 +55,7 @@ model_list:
|
||||||
- model_name: vllm-models
|
- model_name: vllm-models
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/facebook/opt-125m # the `openai/` prefix tells litellm it's openai compatible
|
model: openai/facebook/opt-125m # the `openai/` prefix tells litellm it's openai compatible
|
||||||
api_base: http://0.0.0.0:8000
|
api_base: http://0.0.0.0:4000
|
||||||
rpm: 1440
|
rpm: 1440
|
||||||
model_info:
|
model_info:
|
||||||
version: 2
|
version: 2
|
||||||
|
@ -91,7 +91,7 @@ Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml.
|
||||||
If multiple with `model_name=gpt-3.5-turbo` does [Load Balancing](https://docs.litellm.ai/docs/proxy/load_balancing)
|
If multiple with `model_name=gpt-3.5-turbo` does [Load Balancing](https://docs.litellm.ai/docs/proxy/load_balancing)
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -111,7 +111,7 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \
|
||||||
Sends this request to model where `model_name=bedrock-claude-v1` on config.yaml
|
Sends this request to model where `model_name=bedrock-claude-v1` on config.yaml
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "bedrock-claude-v1",
|
"model": "bedrock-claude-v1",
|
||||||
|
@ -131,7 +131,7 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="anything",
|
api_key="anything",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml.
|
# Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml.
|
||||||
|
@ -179,7 +179,7 @@ messages = [
|
||||||
|
|
||||||
# Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml.
|
# Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml.
|
||||||
chat = ChatOpenAI(
|
chat = ChatOpenAI(
|
||||||
openai_api_base="http://0.0.0.0:8000", # set openai base to the proxy
|
openai_api_base="http://0.0.0.0:4000", # set openai base to the proxy
|
||||||
model = "gpt-3.5-turbo",
|
model = "gpt-3.5-turbo",
|
||||||
temperature=0.1
|
temperature=0.1
|
||||||
)
|
)
|
||||||
|
@ -189,7 +189,7 @@ print(response)
|
||||||
|
|
||||||
# Sends request to model where `model_name=bedrock-claude-v1` on config.yaml.
|
# Sends request to model where `model_name=bedrock-claude-v1` on config.yaml.
|
||||||
claude_chat = ChatOpenAI(
|
claude_chat = ChatOpenAI(
|
||||||
openai_api_base="http://0.0.0.0:8000", # set openai base to the proxy
|
openai_api_base="http://0.0.0.0:4000", # set openai base to the proxy
|
||||||
model = "bedrock-claude-v1",
|
model = "bedrock-claude-v1",
|
||||||
temperature=0.1
|
temperature=0.1
|
||||||
)
|
)
|
||||||
|
@ -560,7 +560,7 @@ litellm --config config.yaml
|
||||||
Sends Request to `bedrock-cohere`
|
Sends Request to `bedrock-cohere`
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "bedrock-cohere",
|
"model": "bedrock-cohere",
|
||||||
|
|
|
@ -254,10 +254,10 @@ helm install \
|
||||||
kubectl \
|
kubectl \
|
||||||
port-forward \
|
port-forward \
|
||||||
service/mydeploy-litellm \
|
service/mydeploy-litellm \
|
||||||
8000:8000
|
4000:4000
|
||||||
```
|
```
|
||||||
|
|
||||||
Your OpenAI proxy server is now running on `http://127.0.0.1:8000`.
|
Your OpenAI proxy server is now running on `http://127.0.0.1:4000`.
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
@ -473,11 +473,11 @@ services:
|
||||||
target: runtime
|
target: runtime
|
||||||
image: ghcr.io/berriai/litellm:main-latest
|
image: ghcr.io/berriai/litellm:main-latest
|
||||||
ports:
|
ports:
|
||||||
- "8000:8000" # Map the container port to the host, change the host port if necessary
|
- "4000:4000" # Map the container port to the host, change the host port if necessary
|
||||||
volumes:
|
volumes:
|
||||||
- ./litellm-config.yaml:/app/config.yaml # Mount the local configuration file
|
- ./litellm-config.yaml:/app/config.yaml # Mount the local configuration file
|
||||||
# You can change the port or number of workers as per your requirements or pass any new supported CLI augument. Make sure the port passed here matches with the container port defined above in `ports` value
|
# You can change the port or number of workers as per your requirements or pass any new supported CLI augument. Make sure the port passed here matches with the container port defined above in `ports` value
|
||||||
command: [ "--config", "/app/config.yaml", "--port", "8000", "--num_workers", "8" ]
|
command: [ "--config", "/app/config.yaml", "--port", "4000", "--num_workers", "8" ]
|
||||||
|
|
||||||
# ...rest of your docker-compose config if any
|
# ...rest of your docker-compose config if any
|
||||||
```
|
```
|
||||||
|
@ -495,4 +495,4 @@ Run the command `docker-compose up` or `docker compose up` as per your docker in
|
||||||
> Use `-d` flag to run the container in detached mode (background) e.g. `docker compose up -d`
|
> Use `-d` flag to run the container in detached mode (background) e.g. `docker compose up -d`
|
||||||
|
|
||||||
|
|
||||||
Your LiteLLM container should be running now on the defined port e.g. `8000`.
|
Your LiteLLM container should be running now on the defined port e.g. `4000`.
|
||||||
|
|
|
@ -38,7 +38,7 @@ $ litellm --config /path/to/config.yaml
|
||||||
3. Test the embedding call
|
3. Test the embedding call
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/v1/embeddings' \
|
curl --location 'http://0.0.0.0:4000/v1/embeddings' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{
|
--data '{
|
||||||
|
|
|
@ -58,7 +58,7 @@ callbacks: ["llamaguard_moderations"]
|
||||||
Set the LLM Guard API Base in your environment
|
Set the LLM Guard API Base in your environment
|
||||||
|
|
||||||
```env
|
```env
|
||||||
LLM_GUARD_API_BASE = "http://0.0.0.0:8000"
|
LLM_GUARD_API_BASE = "http://0.0.0.0:4000"
|
||||||
```
|
```
|
||||||
|
|
||||||
Add `llmguard_moderations` as a callback
|
Add `llmguard_moderations` as a callback
|
||||||
|
@ -143,7 +143,7 @@ When `no-log=True`, the request will **not be logged on any callbacks** and ther
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="anything", # proxy api-key
|
api_key="anything", # proxy api-key
|
||||||
base_url="http://0.0.0.0:8000" # litellm proxy
|
base_url="http://0.0.0.0:4000" # litellm proxy
|
||||||
)
|
)
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
|
@ -175,7 +175,7 @@ litellm_settings:
|
||||||
### How to test
|
### How to test
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -202,7 +202,7 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \
|
||||||
**Block all calls for a user id**
|
**Block all calls for a user id**
|
||||||
|
|
||||||
```
|
```
|
||||||
curl -X POST "http://0.0.0.0:8000/user/block" \
|
curl -X POST "http://0.0.0.0:4000/user/block" \
|
||||||
-H "Authorization: Bearer sk-1234" \
|
-H "Authorization: Bearer sk-1234" \
|
||||||
-D '{
|
-D '{
|
||||||
"user_ids": [<user_id>, ...]
|
"user_ids": [<user_id>, ...]
|
||||||
|
@ -212,7 +212,7 @@ curl -X POST "http://0.0.0.0:8000/user/block" \
|
||||||
**Unblock calls for a user id**
|
**Unblock calls for a user id**
|
||||||
|
|
||||||
```
|
```
|
||||||
curl -X POST "http://0.0.0.0:8000/user/unblock" \
|
curl -X POST "http://0.0.0.0:4000/user/unblock" \
|
||||||
-H "Authorization: Bearer sk-1234" \
|
-H "Authorization: Bearer sk-1234" \
|
||||||
-D '{
|
-D '{
|
||||||
"user_ids": [<user_id>, ...]
|
"user_ids": [<user_id>, ...]
|
||||||
|
@ -230,7 +230,7 @@ litellm_settings:
|
||||||
### Test this
|
### Test this
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -263,7 +263,7 @@ Set `extra_body={"metadata": { }}` to `metadata` you want to pass
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="anything",
|
api_key="anything",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
@ -291,7 +291,7 @@ print(response)
|
||||||
Pass `metadata` as part of the request body
|
Pass `metadata` as part of the request body
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{
|
--data '{
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -317,7 +317,7 @@ from langchain.prompts.chat import (
|
||||||
from langchain.schema import HumanMessage, SystemMessage
|
from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
|
||||||
chat = ChatOpenAI(
|
chat = ChatOpenAI(
|
||||||
openai_api_base="http://0.0.0.0:8000",
|
openai_api_base="http://0.0.0.0:4000",
|
||||||
model = "gpt-3.5-turbo",
|
model = "gpt-3.5-turbo",
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
extra_body={
|
extra_body={
|
||||||
|
|
|
@ -12,10 +12,10 @@ The proxy exposes:
|
||||||
#### Request
|
#### Request
|
||||||
Make a GET Request to `/health` on the proxy
|
Make a GET Request to `/health` on the proxy
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/health' -H "Authorization: Bearer sk-1234"
|
curl --location 'http://0.0.0.0:4000/health' -H "Authorization: Bearer sk-1234"
|
||||||
```
|
```
|
||||||
|
|
||||||
You can also run `litellm -health` it makes a `get` request to `http://0.0.0.0:8000/health` for you
|
You can also run `litellm -health` it makes a `get` request to `http://0.0.0.0:4000/health` for you
|
||||||
```
|
```
|
||||||
litellm --health
|
litellm --health
|
||||||
```
|
```
|
||||||
|
@ -60,7 +60,7 @@ $ litellm /path/to/config.yaml
|
||||||
|
|
||||||
3. Query health endpoint:
|
3. Query health endpoint:
|
||||||
```
|
```
|
||||||
curl --location 'http://0.0.0.0:8000/health'
|
curl --location 'http://0.0.0.0:4000/health'
|
||||||
```
|
```
|
||||||
|
|
||||||
### Embedding Models
|
### Embedding Models
|
||||||
|
@ -119,7 +119,7 @@ Unprotected endpoint for checking if proxy is ready to accept requests
|
||||||
Example Request:
|
Example Request:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:8000/health/readiness'
|
curl --location 'http://0.0.0.0:4000/health/readiness'
|
||||||
```
|
```
|
||||||
|
|
||||||
Example Response:
|
Example Response:
|
||||||
|
@ -153,7 +153,7 @@ Example Request:
|
||||||
|
|
||||||
```
|
```
|
||||||
curl -X 'GET' \
|
curl -X 'GET' \
|
||||||
'http://0.0.0.0:8000/health/liveliness' \
|
'http://0.0.0.0:4000/health/liveliness' \
|
||||||
-H 'accept: application/json'
|
-H 'accept: application/json'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,7 @@ $ litellm --config /path/to/config.yaml
|
||||||
### Step 3: Use proxy - Call a model group [Load Balancing]
|
### Step 3: Use proxy - Call a model group [Load Balancing]
|
||||||
Curl Command
|
Curl Command
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -65,7 +65,7 @@ If you want to call a specific model defined in the `config.yaml`, you can call
|
||||||
In this example it will call `azure/gpt-turbo-small-ca`. Defined in the config on Step 1
|
In this example it will call `azure/gpt-turbo-small-ca`. Defined in the config on Step 1
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "azure/gpt-turbo-small-ca",
|
"model": "azure/gpt-turbo-small-ca",
|
||||||
|
|
|
@ -150,7 +150,7 @@ litellm --config proxy_config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -174,7 +174,7 @@ On Success
|
||||||
Usage: {'completion_tokens': 10, 'prompt_tokens': 11, 'total_tokens': 21},
|
Usage: {'completion_tokens': 10, 'prompt_tokens': 11, 'total_tokens': 21},
|
||||||
Cost: 3.65e-05,
|
Cost: 3.65e-05,
|
||||||
Response: {'id': 'chatcmpl-8S8avKJ1aVBg941y5xzGMSKrYCMvN', 'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Good morning! How can I assist you today?', 'role': 'assistant'}}], 'created': 1701716913, 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'system_fingerprint': None, 'usage': {'completion_tokens': 10, 'prompt_tokens': 11, 'total_tokens': 21}}
|
Response: {'id': 'chatcmpl-8S8avKJ1aVBg941y5xzGMSKrYCMvN', 'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Good morning! How can I assist you today?', 'role': 'assistant'}}], 'created': 1701716913, 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'system_fingerprint': None, 'usage': {'completion_tokens': 10, 'prompt_tokens': 11, 'total_tokens': 21}}
|
||||||
Proxy Metadata: {'user_api_key': None, 'headers': Headers({'host': '0.0.0.0:8000', 'user-agent': 'curl/7.88.1', 'accept': '*/*', 'authorization': 'Bearer sk-1234', 'content-length': '199', 'content-type': 'application/x-www-form-urlencoded'}), 'model_group': 'gpt-3.5-turbo', 'deployment': 'gpt-3.5-turbo-ModelID-gpt-3.5-turbo'}
|
Proxy Metadata: {'user_api_key': None, 'headers': Headers({'host': '0.0.0.0:4000', 'user-agent': 'curl/7.88.1', 'accept': '*/*', 'authorization': 'Bearer sk-1234', 'content-length': '199', 'content-type': 'application/x-www-form-urlencoded'}), 'model_group': 'gpt-3.5-turbo', 'deployment': 'gpt-3.5-turbo-ModelID-gpt-3.5-turbo'}
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Logging Proxy Request Object, Header, Url
|
#### Logging Proxy Request Object, Header, Url
|
||||||
|
@ -374,7 +374,7 @@ async def log_event(request: Request):
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
uvicorn.run(app, host="127.0.0.1", port=8000)
|
uvicorn.run(app, host="127.0.0.1", port=4000)
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -383,7 +383,7 @@ if __name__ == "__main__":
|
||||||
#### Step 2. Set your `GENERIC_LOGGER_ENDPOINT` to the endpoint + route we should send callback logs to
|
#### Step 2. Set your `GENERIC_LOGGER_ENDPOINT` to the endpoint + route we should send callback logs to
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
os.environ["GENERIC_LOGGER_ENDPOINT"] = "http://localhost:8000/log-event"
|
os.environ["GENERIC_LOGGER_ENDPOINT"] = "http://localhost:4000/log-event"
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 3. Create a `config.yaml` file and set `litellm_settings`: `success_callback` = ["generic"]
|
#### Step 3. Create a `config.yaml` file and set `litellm_settings`: `success_callback` = ["generic"]
|
||||||
|
@ -445,7 +445,7 @@ Expected output on Langfuse
|
||||||
Pass `metadata` as part of the request body
|
Pass `metadata` as part of the request body
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{
|
--data '{
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -472,7 +472,7 @@ Set `extra_body={"metadata": { }}` to `metadata` you want to pass
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="anything",
|
api_key="anything",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
@ -509,7 +509,7 @@ from langchain.prompts.chat import (
|
||||||
from langchain.schema import HumanMessage, SystemMessage
|
from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
|
||||||
chat = ChatOpenAI(
|
chat = ChatOpenAI(
|
||||||
openai_api_base="http://0.0.0.0:8000",
|
openai_api_base="http://0.0.0.0:4000",
|
||||||
model = "gpt-3.5-turbo",
|
model = "gpt-3.5-turbo",
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
extra_body={
|
extra_body={
|
||||||
|
@ -663,7 +663,7 @@ litellm --config config.yaml --debug
|
||||||
|
|
||||||
Test Request
|
Test Request
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "Azure OpenAI GPT-4 East",
|
"model": "Azure OpenAI GPT-4 East",
|
||||||
|
@ -698,7 +698,7 @@ litellm_settings:
|
||||||
Now, when you [generate keys](./virtual_keys.md) for this team-id
|
Now, when you [generate keys](./virtual_keys.md) for this team-id
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -X POST 'http://0.0.0.0:8000/key/generate' \
|
curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
||||||
-H 'Authorization: Bearer sk-1234' \
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
-H 'Content-Type: application/json' \
|
-H 'Content-Type: application/json' \
|
||||||
-D '{"team_id": "ishaans-secret-project"}'
|
-D '{"team_id": "ishaans-secret-project"}'
|
||||||
|
@ -742,7 +742,7 @@ litellm --config config.yaml --debug
|
||||||
|
|
||||||
Test Request
|
Test Request
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "Azure OpenAI GPT-4 East",
|
"model": "Azure OpenAI GPT-4 East",
|
||||||
|
@ -903,7 +903,7 @@ litellm --config config.yaml --debug
|
||||||
|
|
||||||
Test Request
|
Test Request
|
||||||
```
|
```
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -947,7 +947,7 @@ litellm --config config.yaml --debug
|
||||||
|
|
||||||
Test Request
|
Test Request
|
||||||
```
|
```
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
|
|
@ -24,7 +24,7 @@ Retrieve detailed information about each model listed in the `/models` endpoint,
|
||||||
<TabItem value="curl">
|
<TabItem value="curl">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -X GET "http://0.0.0.0:8000/model/info" \
|
curl -X GET "http://0.0.0.0:4000/model/info" \
|
||||||
-H "accept: application/json" \
|
-H "accept: application/json" \
|
||||||
```
|
```
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
@ -42,7 +42,7 @@ Add a new model to the list in the `config.yaml` by providing the model paramete
|
||||||
<TabItem value="curl">
|
<TabItem value="curl">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -X POST "http://0.0.0.0:8000/model/new" \
|
curl -X POST "http://0.0.0.0:4000/model/new" \
|
||||||
-H "accept: application/json" \
|
-H "accept: application/json" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{ "model_name": "azure-gpt-turbo", "litellm_params": {"model": "azure/gpt-3.5-turbo", "api_key": "os.environ/AZURE_API_KEY", "api_base": "my-azure-api-base"} }'
|
-d '{ "model_name": "azure-gpt-turbo", "litellm_params": {"model": "azure/gpt-3.5-turbo", "api_key": "os.environ/AZURE_API_KEY", "api_base": "my-azure-api-base"} }'
|
||||||
|
|
|
@ -96,7 +96,7 @@ Turn off PII masking for a given key.
|
||||||
Do this by setting `permissions: {"pii": false}`, when generating a key.
|
Do this by setting `permissions: {"pii": false}`, when generating a key.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/key/generate' \
|
curl --location 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{
|
--data '{
|
||||||
|
@ -119,7 +119,7 @@ The proxy support 2 request-level PII controls:
|
||||||
Set `allow_pii_controls` to true for a given key. This will allow the user to set request-level PII controls.
|
Set `allow_pii_controls` to true for a given key. This will allow the user to set request-level PII controls.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:8000/key/generate' \
|
curl --location 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer my-master-key' \
|
--header 'Authorization: Bearer my-master-key' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{
|
--data '{
|
||||||
|
@ -136,7 +136,7 @@ from openai import OpenAI
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
# This is the default and can be omitted
|
# This is the default and can be omitted
|
||||||
api_key=os.environ.get("OPENAI_API_KEY"),
|
api_key=os.environ.get("OPENAI_API_KEY"),
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
chat_completion = client.chat.completions.create(
|
chat_completion = client.chat.completions.create(
|
||||||
|
|
|
@ -21,7 +21,7 @@ Run the following command to start the litellm proxy
|
||||||
```shell
|
```shell
|
||||||
$ litellm --model huggingface/bigcode/starcoder
|
$ litellm --model huggingface/bigcode/starcoder
|
||||||
|
|
||||||
#INFO: Proxy running on http://0.0.0.0:8000
|
#INFO: Proxy running on http://0.0.0.0:4000
|
||||||
```
|
```
|
||||||
|
|
||||||
### Test
|
### Test
|
||||||
|
@ -250,7 +250,7 @@ litellm --config your_config.yaml
|
||||||
<TabItem value="Curl" label="Curl Request">
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -270,7 +270,7 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="anything",
|
api_key="anything",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
@ -297,7 +297,7 @@ from langchain.prompts.chat import (
|
||||||
from langchain.schema import HumanMessage, SystemMessage
|
from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
|
||||||
chat = ChatOpenAI(
|
chat = ChatOpenAI(
|
||||||
openai_api_base="http://0.0.0.0:8000", # set openai_api_base to the LiteLLM Proxy
|
openai_api_base="http://0.0.0.0:4000", # set openai_api_base to the LiteLLM Proxy
|
||||||
model = "gpt-3.5-turbo",
|
model = "gpt-3.5-turbo",
|
||||||
temperature=0.1
|
temperature=0.1
|
||||||
)
|
)
|
||||||
|
@ -321,7 +321,7 @@ print(response)
|
||||||
```python
|
```python
|
||||||
from langchain.embeddings import OpenAIEmbeddings
|
from langchain.embeddings import OpenAIEmbeddings
|
||||||
|
|
||||||
embeddings = OpenAIEmbeddings(model="sagemaker-embeddings", openai_api_base="http://0.0.0.0:8000", openai_api_key="temp-key")
|
embeddings = OpenAIEmbeddings(model="sagemaker-embeddings", openai_api_base="http://0.0.0.0:4000", openai_api_key="temp-key")
|
||||||
|
|
||||||
|
|
||||||
text = "This is a test document."
|
text = "This is a test document."
|
||||||
|
@ -331,7 +331,7 @@ query_result = embeddings.embed_query(text)
|
||||||
print(f"SAGEMAKER EMBEDDINGS")
|
print(f"SAGEMAKER EMBEDDINGS")
|
||||||
print(query_result[:5])
|
print(query_result[:5])
|
||||||
|
|
||||||
embeddings = OpenAIEmbeddings(model="bedrock-embeddings", openai_api_base="http://0.0.0.0:8000", openai_api_key="temp-key")
|
embeddings = OpenAIEmbeddings(model="bedrock-embeddings", openai_api_base="http://0.0.0.0:4000", openai_api_key="temp-key")
|
||||||
|
|
||||||
text = "This is a test document."
|
text = "This is a test document."
|
||||||
|
|
||||||
|
@ -340,7 +340,7 @@ query_result = embeddings.embed_query(text)
|
||||||
print(f"BEDROCK EMBEDDINGS")
|
print(f"BEDROCK EMBEDDINGS")
|
||||||
print(query_result[:5])
|
print(query_result[:5])
|
||||||
|
|
||||||
embeddings = OpenAIEmbeddings(model="bedrock-titan-embeddings", openai_api_base="http://0.0.0.0:8000", openai_api_key="temp-key")
|
embeddings = OpenAIEmbeddings(model="bedrock-titan-embeddings", openai_api_base="http://0.0.0.0:4000", openai_api_key="temp-key")
|
||||||
|
|
||||||
text = "This is a test document."
|
text = "This is a test document."
|
||||||
|
|
||||||
|
@ -407,11 +407,11 @@ services:
|
||||||
litellm:
|
litellm:
|
||||||
image: ghcr.io/berriai/litellm:main
|
image: ghcr.io/berriai/litellm:main
|
||||||
ports:
|
ports:
|
||||||
- "8000:8000" # Map the container port to the host, change the host port if necessary
|
- "4000:4000" # Map the container port to the host, change the host port if necessary
|
||||||
volumes:
|
volumes:
|
||||||
- ./litellm-config.yaml:/app/config.yaml # Mount the local configuration file
|
- ./litellm-config.yaml:/app/config.yaml # Mount the local configuration file
|
||||||
# You can change the port or number of workers as per your requirements or pass any new supported CLI augument. Make sure the port passed here matches with the container port defined above in `ports` value
|
# You can change the port or number of workers as per your requirements or pass any new supported CLI augument. Make sure the port passed here matches with the container port defined above in `ports` value
|
||||||
command: [ "--config", "/app/config.yaml", "--port", "8000", "--num_workers", "8" ]
|
command: [ "--config", "/app/config.yaml", "--port", "4000", "--num_workers", "8" ]
|
||||||
|
|
||||||
# ...rest of your docker-compose config if any
|
# ...rest of your docker-compose config if any
|
||||||
```
|
```
|
||||||
|
@ -429,7 +429,7 @@ Run the command `docker-compose up` or `docker compose up` as per your docker in
|
||||||
> Use `-d` flag to run the container in detached mode (background) e.g. `docker compose up -d`
|
> Use `-d` flag to run the container in detached mode (background) e.g. `docker compose up -d`
|
||||||
|
|
||||||
|
|
||||||
Your LiteLLM container should be running now on the defined port e.g. `8000`.
|
Your LiteLLM container should be running now on the defined port e.g. `4000`.
|
||||||
|
|
||||||
|
|
||||||
## Using with OpenAI compatible projects
|
## Using with OpenAI compatible projects
|
||||||
|
@ -442,7 +442,7 @@ Set `base_url` to the LiteLLM Proxy server
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="anything",
|
api_key="anything",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
@ -463,7 +463,7 @@ print(response)
|
||||||
```shell
|
```shell
|
||||||
litellm --model gpt-3.5-turbo
|
litellm --model gpt-3.5-turbo
|
||||||
|
|
||||||
#INFO: Proxy running on http://0.0.0.0:8000
|
#INFO: Proxy running on http://0.0.0.0:4000
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 1. Clone the repo
|
#### 1. Clone the repo
|
||||||
|
@ -474,9 +474,9 @@ git clone https://github.com/danny-avila/LibreChat.git
|
||||||
|
|
||||||
|
|
||||||
#### 2. Modify Librechat's `docker-compose.yml`
|
#### 2. Modify Librechat's `docker-compose.yml`
|
||||||
LiteLLM Proxy is running on port `8000`, set `8000` as the proxy below
|
LiteLLM Proxy is running on port `4000`, set `4000` as the proxy below
|
||||||
```yaml
|
```yaml
|
||||||
OPENAI_REVERSE_PROXY=http://host.docker.internal:8000/v1/chat/completions
|
OPENAI_REVERSE_PROXY=http://host.docker.internal:4000/v1/chat/completions
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 3. Save fake OpenAI key in Librechat's `.env`
|
#### 3. Save fake OpenAI key in Librechat's `.env`
|
||||||
|
@ -502,7 +502,7 @@ In the [config.py](https://continue.dev/docs/reference/Models/openai) set this a
|
||||||
api_key="IGNORED",
|
api_key="IGNORED",
|
||||||
model="fake-model-name",
|
model="fake-model-name",
|
||||||
context_length=2048, # customize if needed for your model
|
context_length=2048, # customize if needed for your model
|
||||||
api_base="http://localhost:8000" # your proxy server url
|
api_base="http://localhost:4000" # your proxy server url
|
||||||
),
|
),
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -514,7 +514,7 @@ Credits [@vividfog](https://github.com/jmorganca/ollama/issues/305#issuecomment-
|
||||||
```shell
|
```shell
|
||||||
$ pip install aider
|
$ pip install aider
|
||||||
|
|
||||||
$ aider --openai-api-base http://0.0.0.0:8000 --openai-api-key fake-key
|
$ aider --openai-api-base http://0.0.0.0:4000 --openai-api-key fake-key
|
||||||
```
|
```
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="autogen" label="AutoGen">
|
<TabItem value="autogen" label="AutoGen">
|
||||||
|
@ -528,7 +528,7 @@ from autogen import AssistantAgent, UserProxyAgent, oai
|
||||||
config_list=[
|
config_list=[
|
||||||
{
|
{
|
||||||
"model": "my-fake-model",
|
"model": "my-fake-model",
|
||||||
"api_base": "http://localhost:8000", #litellm compatible endpoint
|
"api_base": "http://localhost:4000", #litellm compatible endpoint
|
||||||
"api_type": "open_ai",
|
"api_type": "open_ai",
|
||||||
"api_key": "NULL", # just a placeholder
|
"api_key": "NULL", # just a placeholder
|
||||||
}
|
}
|
||||||
|
@ -566,7 +566,7 @@ import guidance
|
||||||
|
|
||||||
# set api_base to your proxy
|
# set api_base to your proxy
|
||||||
# set api_key to anything
|
# set api_key to anything
|
||||||
gpt4 = guidance.llms.OpenAI("gpt-4", api_base="http://0.0.0.0:8000", api_key="anything")
|
gpt4 = guidance.llms.OpenAI("gpt-4", api_base="http://0.0.0.0:4000", api_key="anything")
|
||||||
|
|
||||||
experts = guidance('''
|
experts = guidance('''
|
||||||
{{#system~}}
|
{{#system~}}
|
||||||
|
|
|
@ -45,7 +45,7 @@ litellm_settings:
|
||||||
**Set dynamically**
|
**Set dynamically**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "zephyr-beta",
|
"model": "zephyr-beta",
|
||||||
|
@ -101,7 +101,7 @@ LiteLLM Proxy supports setting a `timeout` per request
|
||||||
<TabItem value="Curl" label="Curl Request">
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -121,7 +121,7 @@ import openai
|
||||||
|
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="anything",
|
api_key="anything",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
|
|
|
@ -30,7 +30,7 @@ $ litellm /path/to/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:8000/v1/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
--data '{
|
--data '{
|
||||||
|
|
|
@ -65,7 +65,7 @@ litellm --config proxy_config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
|
|
@ -28,12 +28,12 @@ Follow [setup](./virtual_keys.md#setup)
|
||||||
```bash
|
```bash
|
||||||
litellm --config /path/to/config.yaml
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
#INFO: Proxy running on http://0.0.0.0:8000
|
#INFO: Proxy running on http://0.0.0.0:4000
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. Go to UI
|
### 2. Go to UI
|
||||||
```bash
|
```bash
|
||||||
http://0.0.0.0:8000/ui # <proxy_base_url>/ui
|
http://0.0.0.0:4000/ui # <proxy_base_url>/ui
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ Set `extra_body={"metadata": { }}` to `metadata` you want to pass
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="anything",
|
api_key="anything",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
@ -92,7 +92,7 @@ print(response)
|
||||||
Pass `metadata` as part of the request body
|
Pass `metadata` as part of the request body
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{
|
--data '{
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -123,7 +123,7 @@ from langchain.prompts.chat import (
|
||||||
from langchain.schema import HumanMessage, SystemMessage
|
from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
|
||||||
chat = ChatOpenAI(
|
chat = ChatOpenAI(
|
||||||
openai_api_base="http://0.0.0.0:8000",
|
openai_api_base="http://0.0.0.0:4000",
|
||||||
model = "gpt-3.5-turbo",
|
model = "gpt-3.5-turbo",
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
extra_body={
|
extra_body={
|
||||||
|
@ -195,7 +195,7 @@ from openai import OpenAI
|
||||||
|
|
||||||
# set base_url to your proxy server
|
# set base_url to your proxy server
|
||||||
# set api_key to send to proxy server
|
# set api_key to send to proxy server
|
||||||
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:8000")
|
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
response = client.embeddings.create(
|
response = client.embeddings.create(
|
||||||
input=["hello from litellm"],
|
input=["hello from litellm"],
|
||||||
|
@ -209,7 +209,7 @@ print(response)
|
||||||
<TabItem value="Curl" label="Curl Request">
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/embeddings' \
|
curl --location 'http://0.0.0.0:4000/embeddings' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "text-embedding-ada-002",
|
"model": "text-embedding-ada-002",
|
||||||
|
@ -223,7 +223,7 @@ curl --location 'http://0.0.0.0:8000/embeddings' \
|
||||||
```python
|
```python
|
||||||
from langchain.embeddings import OpenAIEmbeddings
|
from langchain.embeddings import OpenAIEmbeddings
|
||||||
|
|
||||||
embeddings = OpenAIEmbeddings(model="sagemaker-embeddings", openai_api_base="http://0.0.0.0:8000", openai_api_key="temp-key")
|
embeddings = OpenAIEmbeddings(model="sagemaker-embeddings", openai_api_base="http://0.0.0.0:4000", openai_api_key="temp-key")
|
||||||
|
|
||||||
|
|
||||||
text = "This is a test document."
|
text = "This is a test document."
|
||||||
|
@ -233,7 +233,7 @@ query_result = embeddings.embed_query(text)
|
||||||
print(f"SAGEMAKER EMBEDDINGS")
|
print(f"SAGEMAKER EMBEDDINGS")
|
||||||
print(query_result[:5])
|
print(query_result[:5])
|
||||||
|
|
||||||
embeddings = OpenAIEmbeddings(model="bedrock-embeddings", openai_api_base="http://0.0.0.0:8000", openai_api_key="temp-key")
|
embeddings = OpenAIEmbeddings(model="bedrock-embeddings", openai_api_base="http://0.0.0.0:4000", openai_api_key="temp-key")
|
||||||
|
|
||||||
text = "This is a test document."
|
text = "This is a test document."
|
||||||
|
|
||||||
|
@ -242,7 +242,7 @@ query_result = embeddings.embed_query(text)
|
||||||
print(f"BEDROCK EMBEDDINGS")
|
print(f"BEDROCK EMBEDDINGS")
|
||||||
print(query_result[:5])
|
print(query_result[:5])
|
||||||
|
|
||||||
embeddings = OpenAIEmbeddings(model="bedrock-titan-embeddings", openai_api_base="http://0.0.0.0:8000", openai_api_key="temp-key")
|
embeddings = OpenAIEmbeddings(model="bedrock-titan-embeddings", openai_api_base="http://0.0.0.0:4000", openai_api_key="temp-key")
|
||||||
|
|
||||||
text = "This is a test document."
|
text = "This is a test document."
|
||||||
|
|
||||||
|
@ -296,7 +296,7 @@ from openai import OpenAI
|
||||||
|
|
||||||
# set base_url to your proxy server
|
# set base_url to your proxy server
|
||||||
# set api_key to send to proxy server
|
# set api_key to send to proxy server
|
||||||
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:8000")
|
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
response = client.moderations.create(
|
response = client.moderations.create(
|
||||||
input="hello from litellm",
|
input="hello from litellm",
|
||||||
|
@ -310,7 +310,7 @@ print(response)
|
||||||
<TabItem value="Curl" label="Curl Request">
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/moderations' \
|
curl --location 'http://0.0.0.0:4000/moderations' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
--data '{"input": "Sample text goes here", "model": "text-moderation-stable"}'
|
--data '{"input": "Sample text goes here", "model": "text-moderation-stable"}'
|
||||||
|
@ -421,7 +421,7 @@ user_config = {
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="sk-1234",
|
api_key="sk-1234",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
# send request to `user-azure-instance`
|
# send request to `user-azure-instance`
|
||||||
|
@ -489,7 +489,7 @@ const { OpenAI } = require('openai');
|
||||||
|
|
||||||
const openai = new OpenAI({
|
const openai = new OpenAI({
|
||||||
apiKey: "sk-1234",
|
apiKey: "sk-1234",
|
||||||
baseURL: "http://0.0.0.0:8000"
|
baseURL: "http://0.0.0.0:4000"
|
||||||
});
|
});
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
|
@ -516,7 +516,7 @@ Here's how to do it:
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="sk-1234",
|
api_key="sk-1234",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
@ -541,7 +541,7 @@ Pass in the litellm_params (E.g. api_key, api_base, etc.) via the `extra_body` p
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="sk-1234",
|
api_key="sk-1234",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
@ -571,7 +571,7 @@ const { OpenAI } = require('openai');
|
||||||
|
|
||||||
const openai = new OpenAI({
|
const openai = new OpenAI({
|
||||||
apiKey: "sk-1234",
|
apiKey: "sk-1234",
|
||||||
baseURL: "http://0.0.0.0:8000"
|
baseURL: "http://0.0.0.0:4000"
|
||||||
});
|
});
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
|
|
|
@ -44,7 +44,7 @@ litellm /path/to/config.yaml
|
||||||
**Step 3. Send test call**
|
**Step 3. Send test call**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Autherization: Bearer sk-1234' \
|
--header 'Autherization: Bearer sk-1234' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{
|
--data '{
|
||||||
|
@ -72,7 +72,7 @@ By default the `max_budget` is set to `null` and is not checked for keys
|
||||||
|
|
||||||
#### **Add budgets to users**
|
#### **Add budgets to users**
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://localhost:8000/user/new' \
|
curl --location 'http://localhost:4000/user/new' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{"models": ["azure-models"], "max_budget": 0, "user_id": "krrish3@berri.ai"}'
|
--data-raw '{"models": ["azure-models"], "max_budget": 0, "user_id": "krrish3@berri.ai"}'
|
||||||
|
@ -96,7 +96,7 @@ curl --location 'http://localhost:8000/user/new' \
|
||||||
`budget_duration`: Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
`budget_duration`: Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||||
|
|
||||||
```
|
```
|
||||||
curl 'http://0.0.0.0:8000/user/new' \
|
curl 'http://0.0.0.0:4000/user/new' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -113,7 +113,7 @@ Now you can just call `/key/generate` with that user_id (i.e. krrish3@berri.ai)
|
||||||
- **Spend Tracking**: spend for this key will update krrish3@berri.ai's spend as well
|
- **Spend Tracking**: spend for this key will update krrish3@berri.ai's spend as well
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:8000/key/generate' \
|
curl --location 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{"models": ["azure-models"], "user_id": "krrish3@berri.ai"}'
|
--data '{"models": ["azure-models"], "user_id": "krrish3@berri.ai"}'
|
||||||
|
@ -127,7 +127,7 @@ You can:
|
||||||
|
|
||||||
#### **Add budgets to users**
|
#### **Add budgets to users**
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://localhost:8000/team/new' \
|
curl --location 'http://localhost:4000/team/new' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -238,7 +238,7 @@ By default the `max_budget` is set to `null` and is not checked for keys
|
||||||
#### **Add budgets to keys**
|
#### **Add budgets to keys**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl 'http://0.0.0.0:8000/key/generate' \
|
curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -250,7 +250,7 @@ curl 'http://0.0.0.0:8000/key/generate' \
|
||||||
Example Request to `/chat/completions` when key has crossed budget
|
Example Request to `/chat/completions` when key has crossed budget
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--header 'Authorization: Bearer <generated-key>' \
|
--header 'Authorization: Bearer <generated-key>' \
|
||||||
--data ' {
|
--data ' {
|
||||||
|
@ -278,7 +278,7 @@ Expected Response from `/chat/completions` when key has crossed budget
|
||||||
`budget_duration`: Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
`budget_duration`: Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||||
|
|
||||||
```
|
```
|
||||||
curl 'http://0.0.0.0:8000/key/generate' \
|
curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -310,7 +310,7 @@ By default the `model_max_budget` is set to `{}` and is not checked for keys
|
||||||
#### **Add model specific budgets to keys**
|
#### **Add model specific budgets to keys**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl 'http://0.0.0.0:8000/key/generate' \
|
curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -335,7 +335,7 @@ Use `/user/new`, to persist rate limits across multiple keys.
|
||||||
|
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/user/new' \
|
curl --location 'http://0.0.0.0:4000/user/new' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{"user_id": "krrish@berri.ai", "max_parallel_requests": 10, "tpm_limit": 20, "rpm_limit": 4}'
|
--data '{"user_id": "krrish@berri.ai", "max_parallel_requests": 10, "tpm_limit": 20, "rpm_limit": 4}'
|
||||||
|
@ -359,7 +359,7 @@ curl --location 'http://0.0.0.0:8000/user/new' \
|
||||||
Use `/key/generate`, if you want them for just that key.
|
Use `/key/generate`, if you want them for just that key.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/key/generate' \
|
curl --location 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{"max_parallel_requests": 10, "tpm_limit": 20, "rpm_limit": 4}'
|
--data '{"max_parallel_requests": 10, "tpm_limit": 20, "rpm_limit": 4}'
|
||||||
|
@ -401,7 +401,7 @@ model_list:
|
||||||
**Step 2. Create key with access group**
|
**Step 2. Create key with access group**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://localhost:8000/user/new' \
|
curl --location 'http://localhost:4000/user/new' \
|
||||||
-H 'Authorization: Bearer <your-master-key>' \
|
-H 'Authorization: Bearer <your-master-key>' \
|
||||||
-H 'Content-Type: application/json' \
|
-H 'Content-Type: application/json' \
|
||||||
-d '{"models": ["beta-models"], # 👈 Model Access Group
|
-d '{"models": ["beta-models"], # 👈 Model Access Group
|
||||||
|
@ -414,7 +414,7 @@ curl --location 'http://localhost:8000/user/new' \
|
||||||
Just include user_id in the `/key/generate` request.
|
Just include user_id in the `/key/generate` request.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:8000/key/generate' \
|
curl --location 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{"models": ["azure-models"], "user_id": "krrish@berri.ai"}'
|
--data '{"models": ["azure-models"], "user_id": "krrish@berri.ai"}'
|
||||||
|
|
|
@ -59,7 +59,7 @@ litellm --config /path/to/config.yaml
|
||||||
**Step 3: Generate temporary keys**
|
**Step 3: Generate temporary keys**
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl 'http://0.0.0.0:8000/key/generate' \
|
curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai"}}'
|
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai"}}'
|
||||||
|
@ -70,7 +70,7 @@ curl 'http://0.0.0.0:8000/key/generate' \
|
||||||
|
|
||||||
### Request
|
### Request
|
||||||
```shell
|
```shell
|
||||||
curl 'http://0.0.0.0:8000/key/generate' \
|
curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -105,7 +105,7 @@ Request Params:
|
||||||
```python
|
```python
|
||||||
{
|
{
|
||||||
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token
|
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token
|
||||||
"expires": "2023-11-19T01:38:25.838000+00:00" # datetime object
|
"expires": "2023-11-19T01:38:25.834000+00:00" # datetime object
|
||||||
"key_name": "sk-...7sFA" # abbreviated key string, ONLY stored in db if `allow_user_auth: true` set - [see](./ui.md)
|
"key_name": "sk-...7sFA" # abbreviated key string, ONLY stored in db if `allow_user_auth: true` set - [see](./ui.md)
|
||||||
...
|
...
|
||||||
}
|
}
|
||||||
|
@ -147,7 +147,7 @@ model_list:
|
||||||
**Step 2: Generate a user key - enabling them access to specific models, custom model aliases, etc.**
|
**Step 2: Generate a user key - enabling them access to specific models, custom model aliases, etc.**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -X POST "https://0.0.0.0:8000/key/generate" \
|
curl -X POST "https://0.0.0.0:4000/key/generate" \
|
||||||
-H "Authorization: Bearer <your-master-key>" \
|
-H "Authorization: Bearer <your-master-key>" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
|
@ -182,7 +182,7 @@ model_list:
|
||||||
**Step 2. Create key with access group**
|
**Step 2. Create key with access group**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://localhost:8000/key/generate' \
|
curl --location 'http://localhost:4000/key/generate' \
|
||||||
-H 'Authorization: Bearer <your-master-key>' \
|
-H 'Authorization: Bearer <your-master-key>' \
|
||||||
-H 'Content-Type: application/json' \
|
-H 'Content-Type: application/json' \
|
||||||
-d '{"models": ["beta-models"], # 👈 Model Access Group
|
-d '{"models": ["beta-models"], # 👈 Model Access Group
|
||||||
|
@ -194,7 +194,7 @@ curl --location 'http://localhost:8000/key/generate' \
|
||||||
|
|
||||||
### Request
|
### Request
|
||||||
```shell
|
```shell
|
||||||
curl -X GET "http://0.0.0.0:8000/key/info?key=sk-02Wr4IAlN3NvPXvL5JVvDA" \
|
curl -X GET "http://0.0.0.0:4000/key/info?key=sk-02Wr4IAlN3NvPXvL5JVvDA" \
|
||||||
-H "Authorization: Bearer sk-1234"
|
-H "Authorization: Bearer sk-1234"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -228,7 +228,7 @@ Request Params:
|
||||||
|
|
||||||
### Request
|
### Request
|
||||||
```shell
|
```shell
|
||||||
curl 'http://0.0.0.0:8000/key/update' \
|
curl 'http://0.0.0.0:4000/key/update' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -266,7 +266,7 @@ Request Params:
|
||||||
|
|
||||||
### Request
|
### Request
|
||||||
```shell
|
```shell
|
||||||
curl 'http://0.0.0.0:8000/key/delete' \
|
curl 'http://0.0.0.0:4000/key/delete' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -500,7 +500,7 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
Set `max_budget` in (USD $) param in the `key/generate` request. By default the `max_budget` is set to `null` and is not checked for keys
|
Set `max_budget` in (USD $) param in the `key/generate` request. By default the `max_budget` is set to `null` and is not checked for keys
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl 'http://0.0.0.0:8000/key/generate' \
|
curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -517,7 +517,7 @@ curl 'http://0.0.0.0:8000/key/generate' \
|
||||||
Example Request to `/chat/completions` when key has crossed budget
|
Example Request to `/chat/completions` when key has crossed budget
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--header 'Authorization: Bearer sk-ULl_IKCVFy2EZRzQB16RUA' \
|
--header 'Authorization: Bearer sk-ULl_IKCVFy2EZRzQB16RUA' \
|
||||||
--data ' {
|
--data ' {
|
||||||
|
@ -545,10 +545,10 @@ Expected Response from `/chat/completions` when key has crossed budget
|
||||||
|
|
||||||
LiteLLM exposes a `/user/new` endpoint to create budgets for users, that persist across multiple keys.
|
LiteLLM exposes a `/user/new` endpoint to create budgets for users, that persist across multiple keys.
|
||||||
|
|
||||||
This is documented in the swagger (live on your server root endpoint - e.g. `http://0.0.0.0:8000/`). Here's an example request.
|
This is documented in the swagger (live on your server root endpoint - e.g. `http://0.0.0.0:4000/`). Here's an example request.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://localhost:8000/user/new' \
|
curl --location 'http://localhost:4000/user/new' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{"models": ["azure-models"], "max_budget": 0, "user_id": "krrish3@berri.ai"}'
|
--data-raw '{"models": ["azure-models"], "max_budget": 0, "user_id": "krrish3@berri.ai"}'
|
||||||
|
@ -571,7 +571,7 @@ The request is a normal `/key/generate` request body + a `max_budget` field.
|
||||||
You can get spend for a key by using the `/key/info` endpoint.
|
You can get spend for a key by using the `/key/info` endpoint.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl 'http://0.0.0.0:8000/key/info?key=<user-key>' \
|
curl 'http://0.0.0.0:4000/key/info?key=<user-key>' \
|
||||||
-X GET \
|
-X GET \
|
||||||
-H 'Authorization: Bearer <your-master-key>'
|
-H 'Authorization: Bearer <your-master-key>'
|
||||||
```
|
```
|
||||||
|
@ -771,7 +771,7 @@ general_settings:
|
||||||
#### Step 3. Generate Key
|
#### Step 3. Generate Key
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:8000/key/generate' \
|
curl --location 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{"models": ["azure-models"], "aliases": {"mistral-7b": "gpt-3.5-turbo"}, "duration": null}'
|
--data '{"models": ["azure-models"], "aliases": {"mistral-7b": "gpt-3.5-turbo"}, "duration": null}'
|
||||||
|
|
|
@ -22,7 +22,7 @@ $ pip install 'litellm[proxy]'
|
||||||
```shell
|
```shell
|
||||||
$ litellm --model huggingface/bigcode/starcoder
|
$ litellm --model huggingface/bigcode/starcoder
|
||||||
|
|
||||||
#INFO: Proxy running on http://0.0.0.0:8000
|
#INFO: Proxy running on http://0.0.0.0:4000
|
||||||
```
|
```
|
||||||
|
|
||||||
### Test
|
### Test
|
||||||
|
@ -39,7 +39,7 @@ This will now automatically route any requests for gpt-3.5-turbo to bigcode star
|
||||||
<TabItem value="Curl" label="Curl Request">
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -59,7 +59,7 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="anything",
|
api_key="anything",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
@ -246,7 +246,7 @@ Set `base_url` to the LiteLLM Proxy server
|
||||||
import openai
|
import openai
|
||||||
client = openai.OpenAI(
|
client = openai.OpenAI(
|
||||||
api_key="anything",
|
api_key="anything",
|
||||||
base_url="http://0.0.0.0:8000"
|
base_url="http://0.0.0.0:4000"
|
||||||
)
|
)
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
@ -267,7 +267,7 @@ print(response)
|
||||||
```shell
|
```shell
|
||||||
litellm --model gpt-3.5-turbo
|
litellm --model gpt-3.5-turbo
|
||||||
|
|
||||||
#INFO: Proxy running on http://0.0.0.0:8000
|
#INFO: Proxy running on http://0.0.0.0:4000
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 1. Clone the repo
|
#### 1. Clone the repo
|
||||||
|
@ -278,9 +278,9 @@ git clone https://github.com/danny-avila/LibreChat.git
|
||||||
|
|
||||||
|
|
||||||
#### 2. Modify Librechat's `docker-compose.yml`
|
#### 2. Modify Librechat's `docker-compose.yml`
|
||||||
LiteLLM Proxy is running on port `8000`, set `8000` as the proxy below
|
LiteLLM Proxy is running on port `4000`, set `4000` as the proxy below
|
||||||
```yaml
|
```yaml
|
||||||
OPENAI_REVERSE_PROXY=http://host.docker.internal:8000/v1/chat/completions
|
OPENAI_REVERSE_PROXY=http://host.docker.internal:4000/v1/chat/completions
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 3. Save fake OpenAI key in Librechat's `.env`
|
#### 3. Save fake OpenAI key in Librechat's `.env`
|
||||||
|
@ -306,7 +306,7 @@ In the [config.py](https://continue.dev/docs/reference/Models/openai) set this a
|
||||||
api_key="IGNORED",
|
api_key="IGNORED",
|
||||||
model="fake-model-name",
|
model="fake-model-name",
|
||||||
context_length=2048, # customize if needed for your model
|
context_length=2048, # customize if needed for your model
|
||||||
api_base="http://localhost:8000" # your proxy server url
|
api_base="http://localhost:4000" # your proxy server url
|
||||||
),
|
),
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -318,7 +318,7 @@ Credits [@vividfog](https://github.com/jmorganca/ollama/issues/305#issuecomment-
|
||||||
```shell
|
```shell
|
||||||
$ pip install aider
|
$ pip install aider
|
||||||
|
|
||||||
$ aider --openai-api-base http://0.0.0.0:8000 --openai-api-key fake-key
|
$ aider --openai-api-base http://0.0.0.0:4000 --openai-api-key fake-key
|
||||||
```
|
```
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="autogen" label="AutoGen">
|
<TabItem value="autogen" label="AutoGen">
|
||||||
|
@ -332,7 +332,7 @@ from autogen import AssistantAgent, UserProxyAgent, oai
|
||||||
config_list=[
|
config_list=[
|
||||||
{
|
{
|
||||||
"model": "my-fake-model",
|
"model": "my-fake-model",
|
||||||
"api_base": "http://localhost:8000", #litellm compatible endpoint
|
"api_base": "http://localhost:4000", #litellm compatible endpoint
|
||||||
"api_type": "open_ai",
|
"api_type": "open_ai",
|
||||||
"api_key": "NULL", # just a placeholder
|
"api_key": "NULL", # just a placeholder
|
||||||
}
|
}
|
||||||
|
@ -370,7 +370,7 @@ import guidance
|
||||||
|
|
||||||
# set api_base to your proxy
|
# set api_base to your proxy
|
||||||
# set api_key to anything
|
# set api_key to anything
|
||||||
gpt4 = guidance.llms.OpenAI("gpt-4", api_base="http://0.0.0.0:8000", api_key="anything")
|
gpt4 = guidance.llms.OpenAI("gpt-4", api_base="http://0.0.0.0:4000", api_key="anything")
|
||||||
|
|
||||||
experts = guidance('''
|
experts = guidance('''
|
||||||
{{#system~}}
|
{{#system~}}
|
||||||
|
@ -479,7 +479,7 @@ $ litellm --config /path/to/config.yaml
|
||||||
#### Step 3: Use proxy
|
#### Step 3: Use proxy
|
||||||
Curl Command
|
Curl Command
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "zephyr-alpha",
|
"model": "zephyr-alpha",
|
||||||
|
@ -529,7 +529,7 @@ $ litellm --config /path/to/config.yaml
|
||||||
#### Step 3: Use proxy
|
#### Step 3: Use proxy
|
||||||
Curl Command
|
Curl Command
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -586,7 +586,7 @@ litellm_settings:
|
||||||
**Set dynamically**
|
**Set dynamically**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "zephyr-beta",
|
"model": "zephyr-beta",
|
||||||
|
@ -615,7 +615,7 @@ model_list:
|
||||||
- model_name: custom_embedding_model
|
- model_name: custom_embedding_model
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/custom_embedding # the `openai/` prefix tells litellm it's openai compatible
|
model: openai/custom_embedding # the `openai/` prefix tells litellm it's openai compatible
|
||||||
api_base: http://0.0.0.0:8000/
|
api_base: http://0.0.0.0:4000/
|
||||||
- model_name: custom_embedding_model
|
- model_name: custom_embedding_model
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/custom_embedding # the `openai/` prefix tells litellm it's openai compatible
|
model: openai/custom_embedding # the `openai/` prefix tells litellm it's openai compatible
|
||||||
|
@ -665,7 +665,7 @@ litellm --config /path/to/config.yaml
|
||||||
**Step 3: Generate temporary keys**
|
**Step 3: Generate temporary keys**
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl 'http://0.0.0.0:8000/key/generate' \
|
curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
--h 'Authorization: Bearer sk-1234' \
|
--h 'Authorization: Bearer sk-1234' \
|
||||||
--d '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m"}'
|
--d '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m"}'
|
||||||
```
|
```
|
||||||
|
@ -719,7 +719,7 @@ model_list:
|
||||||
**Step 2: Generate a user key - enabling them access to specific models, custom model aliases, etc.**
|
**Step 2: Generate a user key - enabling them access to specific models, custom model aliases, etc.**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -X POST "https://0.0.0.0:8000/key/generate" \
|
curl -X POST "https://0.0.0.0:4000/key/generate" \
|
||||||
-H "Authorization: Bearer sk-1234" \
|
-H "Authorization: Bearer sk-1234" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
|
@ -737,7 +737,7 @@ curl -X POST "https://0.0.0.0:8000/key/generate" \
|
||||||
You can get spend for a key by using the `/key/info` endpoint.
|
You can get spend for a key by using the `/key/info` endpoint.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl 'http://0.0.0.0:8000/key/info?key=<user-key>' \
|
curl 'http://0.0.0.0:4000/key/info?key=<user-key>' \
|
||||||
-X GET \
|
-X GET \
|
||||||
-H 'Authorization: Bearer <your-master-key>'
|
-H 'Authorization: Bearer <your-master-key>'
|
||||||
```
|
```
|
||||||
|
@ -868,7 +868,7 @@ $ litellm --config /path/to/config.yaml
|
||||||
#### Using Caching
|
#### Using Caching
|
||||||
Send the same request twice:
|
Send the same request twice:
|
||||||
```shell
|
```shell
|
||||||
curl http://0.0.0.0:8000/v1/chat/completions \
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -876,7 +876,7 @@ curl http://0.0.0.0:8000/v1/chat/completions \
|
||||||
"temperature": 0.7
|
"temperature": 0.7
|
||||||
}'
|
}'
|
||||||
|
|
||||||
curl http://0.0.0.0:8000/v1/chat/completions \
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -889,7 +889,7 @@ curl http://0.0.0.0:8000/v1/chat/completions \
|
||||||
Caching can be switched on/off per `/chat/completions` request
|
Caching can be switched on/off per `/chat/completions` request
|
||||||
- Caching **on** for completion - pass `caching=True`:
|
- Caching **on** for completion - pass `caching=True`:
|
||||||
```shell
|
```shell
|
||||||
curl http://0.0.0.0:8000/v1/chat/completions \
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -900,7 +900,7 @@ Caching can be switched on/off per `/chat/completions` request
|
||||||
```
|
```
|
||||||
- Caching **off** for completion - pass `caching=False`:
|
- Caching **off** for completion - pass `caching=False`:
|
||||||
```shell
|
```shell
|
||||||
curl http://0.0.0.0:8000/v1/chat/completions \
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -963,10 +963,10 @@ https://api.openai.com/v1/chat/completions \
|
||||||
Use this to health check all LLMs defined in your config.yaml
|
Use this to health check all LLMs defined in your config.yaml
|
||||||
#### Request
|
#### Request
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/health'
|
curl --location 'http://0.0.0.0:4000/health'
|
||||||
```
|
```
|
||||||
|
|
||||||
You can also run `litellm -health` it makes a `get` request to `http://0.0.0.0:8000/health` for you
|
You can also run `litellm -health` it makes a `get` request to `http://0.0.0.0:4000/health` for you
|
||||||
```
|
```
|
||||||
litellm --health
|
litellm --health
|
||||||
```
|
```
|
||||||
|
@ -1087,7 +1087,7 @@ litellm -config config.yaml
|
||||||
|
|
||||||
#### Run a test request to Proxy
|
#### Run a test request to Proxy
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Authorization: Bearer sk-1244' \
|
--header 'Authorization: Bearer sk-1244' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
@ -1213,7 +1213,7 @@ LiteLLM proxy adds **0.00325 seconds** latency as compared to using the Raw Open
|
||||||
```
|
```
|
||||||
|
|
||||||
#### --port
|
#### --port
|
||||||
- **Default:** `8000`
|
- **Default:** `4000`
|
||||||
- The port to bind the server to.
|
- The port to bind the server to.
|
||||||
- **Usage:**
|
- **Usage:**
|
||||||
```shell
|
```shell
|
||||||
|
|
|
@ -61,7 +61,7 @@ def is_port_in_use(port):
|
||||||
@click.option(
|
@click.option(
|
||||||
"--host", default="0.0.0.0", help="Host for the server to listen on.", envvar="HOST"
|
"--host", default="0.0.0.0", help="Host for the server to listen on.", envvar="HOST"
|
||||||
)
|
)
|
||||||
@click.option("--port", default=8000, help="Port to bind the server to.", envvar="PORT")
|
@click.option("--port", default=4000, help="Port to bind the server to.", envvar="PORT")
|
||||||
@click.option(
|
@click.option(
|
||||||
"--num_workers",
|
"--num_workers",
|
||||||
default=default_num_workers,
|
default=default_num_workers,
|
||||||
|
@ -273,7 +273,7 @@ def run_server(
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
response = requests.post("http://0.0.0.0:8000/queue/request", json=data)
|
response = requests.post("http://0.0.0.0:4000/queue/request", json=data)
|
||||||
|
|
||||||
response = response.json()
|
response = response.json()
|
||||||
|
|
||||||
|
@ -507,7 +507,7 @@ def run_server(
|
||||||
print(
|
print(
|
||||||
f"Unable to connect to DB. DATABASE_URL found in environment, but prisma package not found."
|
f"Unable to connect to DB. DATABASE_URL found in environment, but prisma package not found."
|
||||||
)
|
)
|
||||||
if port == 8000 and is_port_in_use(port):
|
if port == 4000 and is_port_in_use(port):
|
||||||
port = random.randint(1024, 49152)
|
port = random.randint(1024, 49152)
|
||||||
|
|
||||||
from litellm.proxy.proxy_server import app
|
from litellm.proxy.proxy_server import app
|
||||||
|
|
|
@ -246,7 +246,7 @@ class Router:
|
||||||
"122999-2828282-277:
|
"122999-2828282-277:
|
||||||
{
|
{
|
||||||
"model": "gpt-3",
|
"model": "gpt-3",
|
||||||
"api_base": "http://localhost:8000",
|
"api_base": "http://localhost:4000",
|
||||||
"num_requests": 20,
|
"num_requests": 20,
|
||||||
"avg_latency": 0.001,
|
"avg_latency": 0.001,
|
||||||
"num_failures": 0,
|
"num_failures": 0,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue