litellm-mirror/docs/my-website/docs/proxy/user_keys.md
2024-05-11 14:37:32 -07:00

775 lines
No EOL
19 KiB
Markdown

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Use with Langchain, OpenAI SDK, LlamaIndex, Curl
:::info
**Input, Output, Exceptions are mapped to the OpenAI format for all supported models**
:::
How to send requests to the proxy, pass metadata, allow users to pass in their OpenAI API key
## `/chat/completions`
### Request Format
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={ # pass in any provider-specific param, if not supported by openai, https://docs.litellm.ai/docs/completion/input#provider-specific-params
"metadata": { # 👈 use for logging additional params (e.g. to langfuse)
"generation_name": "ishaan-generation-openai-client",
"generation_id": "openai-client-gen-id22",
"trace_id": "openai-client-trace-id22",
"trace_user_id": "openai-client-user-id2"
}
}
)
print(response)
```
</TabItem>
<TabItem value="LlamaIndex" label="LlamaIndex">
```python
import os, dotenv
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
llm = AzureOpenAI(
engine="azure-gpt-3.5", # model_name on litellm proxy
temperature=0.0,
azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
api_key="sk-1234", # litellm proxy API Key
api_version="2023-07-01-preview",
)
embed_model = AzureOpenAIEmbedding(
deployment_name="azure-embedding-model",
azure_endpoint="http://0.0.0.0:4000",
api_key="sk-1234",
api_version="2023-07-01-preview",
)
documents = SimpleDirectoryReader("llama_index_data").load_data()
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(response)
```
</TabItem>
<TabItem value="Curl" label="Curl Request">
Pass `metadata` as part of the request body
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
"metadata": {
"generation_name": "ishaan-test-generation",
"generation_id": "gen-id22",
"trace_id": "trace-id22",
"trace_user_id": "user-id2"
}
}'
```
</TabItem>
<TabItem value="langchain" label="Langchain">
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
import os
os.environ["OPENAI_API_KEY"] = "anything"
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:4000",
model = "gpt-3.5-turbo",
temperature=0.1,
extra_body={
"metadata": {
"generation_name": "ishaan-generation-langchain-client",
"generation_id": "langchain-client-gen-id22",
"trace_id": "langchain-client-trace-id22",
"trace_user_id": "langchain-client-user-id2"
}
}
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
```
</TabItem>
</Tabs>
### Response Format
```json
{
"id": "chatcmpl-8c5qbGTILZa1S4CK3b31yj5N40hFN",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "As an AI language model, I do not have a physical form or personal preferences. However, I am programmed to assist with various topics and provide information on a wide range of subjects. Is there something specific you would like assistance with?",
"role": "assistant"
}
}
],
"created": 1704089632,
"model": "gpt-35-turbo",
"object": "chat.completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 47,
"prompt_tokens": 12,
"total_tokens": 59
},
"_response_ms": 1753.426
}
```
## `/embeddings`
### Request Format
Input, Output and Exceptions are mapped to the OpenAI format for all supported models
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
```python
import openai
from openai import OpenAI
# set base_url to your proxy server
# set api_key to send to proxy server
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:4000")
response = client.embeddings.create(
input=["hello from litellm"],
model="text-embedding-ada-002"
)
print(response)
```
</TabItem>
<TabItem value="Curl" label="Curl Request">
```shell
curl --location 'http://0.0.0.0:4000/embeddings' \
--header 'Content-Type: application/json' \
--data ' {
"model": "text-embedding-ada-002",
"input": ["write a litellm poem"]
}'
```
</TabItem>
<TabItem value="langchain-embedding" label="Langchain Embeddings">
```python
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="sagemaker-embeddings", openai_api_base="http://0.0.0.0:4000", openai_api_key="temp-key")
text = "This is a test document."
query_result = embeddings.embed_query(text)
print(f"SAGEMAKER EMBEDDINGS")
print(query_result[:5])
embeddings = OpenAIEmbeddings(model="bedrock-embeddings", openai_api_base="http://0.0.0.0:4000", openai_api_key="temp-key")
text = "This is a test document."
query_result = embeddings.embed_query(text)
print(f"BEDROCK EMBEDDINGS")
print(query_result[:5])
embeddings = OpenAIEmbeddings(model="bedrock-titan-embeddings", openai_api_base="http://0.0.0.0:4000", openai_api_key="temp-key")
text = "This is a test document."
query_result = embeddings.embed_query(text)
print(f"TITAN EMBEDDINGS")
print(query_result[:5])
```
</TabItem>
</Tabs>
### Response Format
```json
{
"object": "list",
"data": [
{
"object": "embedding",
"embedding": [
0.0023064255,
-0.009327292,
....
-0.0028842222,
],
"index": 0
}
],
"model": "text-embedding-ada-002",
"usage": {
"prompt_tokens": 8,
"total_tokens": 8
}
}
```
## `/moderations`
### Request Format
Input, Output and Exceptions are mapped to the OpenAI format for all supported models
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
```python
import openai
from openai import OpenAI
# set base_url to your proxy server
# set api_key to send to proxy server
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:4000")
response = client.moderations.create(
input="hello from litellm",
model="text-moderation-stable"
)
print(response)
```
</TabItem>
<TabItem value="Curl" label="Curl Request">
```shell
curl --location 'http://0.0.0.0:4000/moderations' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer sk-1234' \
--data '{"input": "Sample text goes here", "model": "text-moderation-stable"}'
```
</TabItem>
</Tabs>
### Response Format
```json
{
"id": "modr-8sFEN22QCziALOfWTa77TodNLgHwA",
"model": "text-moderation-007",
"results": [
{
"categories": {
"harassment": false,
"harassment/threatening": false,
"hate": false,
"hate/threatening": false,
"self-harm": false,
"self-harm/instructions": false,
"self-harm/intent": false,
"sexual": false,
"sexual/minors": false,
"violence": false,
"violence/graphic": false
},
"category_scores": {
"harassment": 0.000019947197870351374,
"harassment/threatening": 5.5971017900446896e-6,
"hate": 0.000028560316422954202,
"hate/threatening": 2.2631787999216613e-8,
"self-harm": 2.9121162015144364e-7,
"self-harm/instructions": 9.314219084899378e-8,
"self-harm/intent": 8.093739012338119e-8,
"sexual": 0.00004414955765241757,
"sexual/minors": 0.0000156943697220413,
"violence": 0.00022354527027346194,
"violence/graphic": 8.804164281173144e-6
},
"flagged": false
}
]
}
```
## Advanced
### (BETA) Batch Completions - pass multiple models
Use this when you want to send 1 request to N Models
#### Expected Request Format
Pass model as a string of comma separated value of models. Example `"model"="llama3,gpt-3.5-turbo"`
This same request will be sent to the following model groups on the [litellm proxy config.yaml](https://docs.litellm.ai/docs/proxy/configs)
- `model_name="llama3"`
- `model_name="gpt-3.5-turbo"`
<Tabs>
<TabItem value="openai-py" label="OpenAI Python SDK">
```python
import openai
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
response = client.chat.completions.create(
model="gpt-3.5-turbo,llama3",
messages=[
{"role": "user", "content": "this is a test request, write a short poem"}
],
)
print(response)
```
#### Expected Response Format
Get a list of responses when `model` is passed as a list
```python
[
ChatCompletion(
id='chatcmpl-9NoYhS2G0fswot0b6QpoQgmRQMaIf',
choices=[
Choice(
finish_reason='stop',
index=0,
logprobs=None,
message=ChatCompletionMessage(
content='In the depths of my soul, a spark ignites\nA light that shines so pure and bright\nIt dances and leaps, refusing to die\nA flame of hope that reaches the sky\n\nIt warms my heart and fills me with bliss\nA reminder that in darkness, there is light to kiss\nSo I hold onto this fire, this guiding light\nAnd let it lead me through the darkest night.',
role='assistant',
function_call=None,
tool_calls=None
)
)
],
created=1715462919,
model='gpt-3.5-turbo-0125',
object='chat.completion',
system_fingerprint=None,
usage=CompletionUsage(
completion_tokens=83,
prompt_tokens=17,
total_tokens=100
)
),
ChatCompletion(
id='chatcmpl-4ac3e982-da4e-486d-bddb-ed1d5cb9c03c',
choices=[
Choice(
finish_reason='stop',
index=0,
logprobs=None,
message=ChatCompletionMessage(
content="A test request, and I'm delighted!\nHere's a short poem, just for you:\n\nMoonbeams dance upon the sea,\nA path of light, for you to see.\nThe stars up high, a twinkling show,\nA night of wonder, for all to know.\n\nThe world is quiet, save the night,\nA peaceful hush, a gentle light.\nThe world is full, of beauty rare,\nA treasure trove, beyond compare.\n\nI hope you enjoyed this little test,\nA poem born, of whimsy and jest.\nLet me know, if there's anything else!",
role='assistant',
function_call=None,
tool_calls=None
)
)
],
created=1715462919,
model='groq/llama3-8b-8192',
object='chat.completion',
system_fingerprint='fp_a2c8d063cb',
usage=CompletionUsage(
completion_tokens=120,
prompt_tokens=20,
total_tokens=140
)
)
]
```
</TabItem>
<TabItem value="curl" label="Curl">
```shell
curl --location 'http://localhost:4000/chat/completions' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"model": "llama3,gpt-3.5-turbo",
"max_tokens": 10,
"user": "litellm2",
"messages": [
{
"role": "user",
"content": "is litellm getting better"
}
]
}'
```
#### Expected Response Format
Get a list of responses when `model` is passed as a list
```json
[
{
"id": "chatcmpl-3dbd5dd8-7c82-4ca3-bf1f-7c26f497cf2b",
"choices": [
{
"finish_reason": "length",
"index": 0,
"message": {
"content": "The Elder Scrolls IV: Oblivion!\n\nReleased",
"role": "assistant"
}
}
],
"created": 1715459876,
"model": "groq/llama3-8b-8192",
"object": "chat.completion",
"system_fingerprint": "fp_179b0f92c9",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 12,
"total_tokens": 22
}
},
{
"id": "chatcmpl-9NnldUfFLmVquFHSX4yAtjCw8PGei",
"choices": [
{
"finish_reason": "length",
"index": 0,
"message": {
"content": "TES4 could refer to The Elder Scrolls IV:",
"role": "assistant"
}
}
],
"created": 1715459877,
"model": "gpt-3.5-turbo-0125",
"object": "chat.completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 10,
"prompt_tokens": 9,
"total_tokens": 19
}
}
]
```
</TabItem>
</Tabs>
### Pass User LLM API Keys, Fallbacks
Allow your end-users to pass their model list, api base, OpenAI API key (any LiteLLM supported provider) to make requests
**Note** This is not related to [virtual keys](./virtual_keys.md). This is for when you want to pass in your users actual LLM API keys.
:::info
**You can pass a litellm.RouterConfig as `user_config`, See all supported params here https://github.com/BerriAI/litellm/blob/main/litellm/types/router.py **
:::
<Tabs>
<TabItem value="openai-py" label="OpenAI Python">
#### Step 1: Define user model list & config
```python
import os
user_config = {
'model_list': [
{
'model_name': 'user-azure-instance',
'litellm_params': {
'model': 'azure/chatgpt-v-2',
'api_key': os.getenv('AZURE_API_KEY'),
'api_version': os.getenv('AZURE_API_VERSION'),
'api_base': os.getenv('AZURE_API_BASE'),
'timeout': 10,
},
'tpm': 240000,
'rpm': 1800,
},
{
'model_name': 'user-openai-instance',
'litellm_params': {
'model': 'gpt-3.5-turbo',
'api_key': os.getenv('OPENAI_API_KEY'),
'timeout': 10,
},
'tpm': 240000,
'rpm': 1800,
},
],
'num_retries': 2,
'allowed_fails': 3,
'fallbacks': [
{
'user-azure-instance': ['user-openai-instance']
}
]
}
```
#### Step 2: Send user_config in `extra_body`
```python
import openai
client = openai.OpenAI(
api_key="sk-1234",
base_url="http://0.0.0.0:4000"
)
# send request to `user-azure-instance`
response = client.chat.completions.create(model="user-azure-instance", messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"user_config": user_config
}
) # 👈 User config
print(response)
```
</TabItem>
<TabItem value="openai-js" label="OpenAI JS">
#### Step 1: Define user model list & config
```javascript
const os = require('os');
const userConfig = {
model_list: [
{
model_name: 'user-azure-instance',
litellm_params: {
model: 'azure/chatgpt-v-2',
api_key: process.env.AZURE_API_KEY,
api_version: process.env.AZURE_API_VERSION,
api_base: process.env.AZURE_API_BASE,
timeout: 10,
},
tpm: 240000,
rpm: 1800,
},
{
model_name: 'user-openai-instance',
litellm_params: {
model: 'gpt-3.5-turbo',
api_key: process.env.OPENAI_API_KEY,
timeout: 10,
},
tpm: 240000,
rpm: 1800,
},
],
num_retries: 2,
allowed_fails: 3,
fallbacks: [
{
'user-azure-instance': ['user-openai-instance']
}
]
};
```
#### Step 2: Send `user_config` as a param to `openai.chat.completions.create`
```javascript
const { OpenAI } = require('openai');
const openai = new OpenAI({
apiKey: "sk-1234",
baseURL: "http://0.0.0.0:4000"
});
async function main() {
const chatCompletion = await openai.chat.completions.create({
messages: [{ role: 'user', content: 'Say this is a test' }],
model: 'gpt-3.5-turbo',
user_config: userConfig // # 👈 User config
});
}
main();
```
</TabItem>
</Tabs>
### Pass User LLM API Keys
Allows your users to pass in their OpenAI API key (any LiteLLM supported provider) to make requests
Here's how to do it:
```python
import openai
client = openai.OpenAI(
api_key="sk-1234",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={"api_key": "my-bad-key"}) # 👈 User Key
print(response)
```
More examples:
<Tabs>
<TabItem value="openai-py" label="Azure Credentials">
Pass in the litellm_params (E.g. api_key, api_base, etc.) via the `extra_body` parameter in the OpenAI client.
```python
import openai
client = openai.OpenAI(
api_key="sk-1234",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"api_key": "my-azure-key",
"api_base": "my-azure-base",
"api_version": "my-azure-version"
}) # 👈 User Key
print(response)
```
</TabItem>
<TabItem value="openai-js" label="OpenAI JS">
For JS, the OpenAI client accepts passing params in the `create(..)` body as normal.
```javascript
const { OpenAI } = require('openai');
const openai = new OpenAI({
apiKey: "sk-1234",
baseURL: "http://0.0.0.0:4000"
});
async function main() {
const chatCompletion = await openai.chat.completions.create({
messages: [{ role: 'user', content: 'Say this is a test' }],
model: 'gpt-3.5-turbo',
api_key: "my-bad-key" // 👈 User Key
});
}
main();
```
</TabItem>
</Tabs>