forked from phoenix/litellm-mirror
510 lines
No EOL
12 KiB
Markdown
510 lines
No EOL
12 KiB
Markdown
import Tabs from '@theme/Tabs';
|
|
import TabItem from '@theme/TabItem';
|
|
|
|
# Use with Langchain, OpenAI SDK, LlamaIndex, Curl
|
|
|
|
:::info
|
|
|
|
**Input, Output, Exceptions are mapped to the OpenAI format for all supported models**
|
|
|
|
:::
|
|
|
|
How to send requests to the proxy, pass metadata, allow users to pass in their OpenAI API key
|
|
|
|
## `/chat/completions`
|
|
|
|
### Request Format
|
|
|
|
<Tabs>
|
|
|
|
|
|
<TabItem value="openai" label="OpenAI Python v1.0.0+">
|
|
|
|
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
|
|
|
|
```python
|
|
import openai
|
|
client = openai.OpenAI(
|
|
api_key="anything",
|
|
base_url="http://0.0.0.0:8000"
|
|
)
|
|
|
|
# request sent to model set on litellm proxy, `litellm --model`
|
|
response = client.chat.completions.create(
|
|
model="gpt-3.5-turbo",
|
|
messages = [
|
|
{
|
|
"role": "user",
|
|
"content": "this is a test request, write a short poem"
|
|
}
|
|
],
|
|
extra_body={
|
|
"metadata": {
|
|
"generation_name": "ishaan-generation-openai-client",
|
|
"generation_id": "openai-client-gen-id22",
|
|
"trace_id": "openai-client-trace-id22",
|
|
"trace_user_id": "openai-client-user-id2"
|
|
}
|
|
}
|
|
)
|
|
|
|
print(response)
|
|
```
|
|
</TabItem>
|
|
<TabItem value="LlamaIndex" label="LlamaIndex">
|
|
|
|
```python
|
|
import os, dotenv
|
|
|
|
from llama_index.llms import AzureOpenAI
|
|
from llama_index.embeddings import AzureOpenAIEmbedding
|
|
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
|
|
|
|
llm = AzureOpenAI(
|
|
engine="azure-gpt-3.5", # model_name on litellm proxy
|
|
temperature=0.0,
|
|
azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
|
|
api_key="sk-1234", # litellm proxy API Key
|
|
api_version="2023-07-01-preview",
|
|
)
|
|
|
|
embed_model = AzureOpenAIEmbedding(
|
|
deployment_name="azure-embedding-model",
|
|
azure_endpoint="http://0.0.0.0:4000",
|
|
api_key="sk-1234",
|
|
api_version="2023-07-01-preview",
|
|
)
|
|
|
|
|
|
documents = SimpleDirectoryReader("llama_index_data").load_data()
|
|
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
|
|
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
|
|
|
|
query_engine = index.as_query_engine()
|
|
response = query_engine.query("What did the author do growing up?")
|
|
print(response)
|
|
|
|
```
|
|
</TabItem>
|
|
|
|
<TabItem value="Curl" label="Curl Request">
|
|
|
|
Pass `metadata` as part of the request body
|
|
|
|
```shell
|
|
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
|
--header 'Content-Type: application/json' \
|
|
--data '{
|
|
"model": "gpt-3.5-turbo",
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "what llm are you"
|
|
}
|
|
],
|
|
"metadata": {
|
|
"generation_name": "ishaan-test-generation",
|
|
"generation_id": "gen-id22",
|
|
"trace_id": "trace-id22",
|
|
"trace_user_id": "user-id2"
|
|
}
|
|
}'
|
|
```
|
|
</TabItem>
|
|
<TabItem value="langchain" label="Langchain">
|
|
|
|
```python
|
|
from langchain.chat_models import ChatOpenAI
|
|
from langchain.prompts.chat import (
|
|
ChatPromptTemplate,
|
|
HumanMessagePromptTemplate,
|
|
SystemMessagePromptTemplate,
|
|
)
|
|
from langchain.schema import HumanMessage, SystemMessage
|
|
|
|
chat = ChatOpenAI(
|
|
openai_api_base="http://0.0.0.0:8000",
|
|
model = "gpt-3.5-turbo",
|
|
temperature=0.1,
|
|
extra_body={
|
|
"metadata": {
|
|
"generation_name": "ishaan-generation-langchain-client",
|
|
"generation_id": "langchain-client-gen-id22",
|
|
"trace_id": "langchain-client-trace-id22",
|
|
"trace_user_id": "langchain-client-user-id2"
|
|
}
|
|
}
|
|
)
|
|
|
|
messages = [
|
|
SystemMessage(
|
|
content="You are a helpful assistant that im using to make a test request to."
|
|
),
|
|
HumanMessage(
|
|
content="test from litellm. tell me why it's amazing in 1 sentence"
|
|
),
|
|
]
|
|
response = chat(messages)
|
|
|
|
print(response)
|
|
```
|
|
|
|
</TabItem>
|
|
</Tabs>
|
|
|
|
### Response Format
|
|
|
|
```json
|
|
{
|
|
"id": "chatcmpl-8c5qbGTILZa1S4CK3b31yj5N40hFN",
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": {
|
|
"content": "As an AI language model, I do not have a physical form or personal preferences. However, I am programmed to assist with various topics and provide information on a wide range of subjects. Is there something specific you would like assistance with?",
|
|
"role": "assistant"
|
|
}
|
|
}
|
|
],
|
|
"created": 1704089632,
|
|
"model": "gpt-35-turbo",
|
|
"object": "chat.completion",
|
|
"system_fingerprint": null,
|
|
"usage": {
|
|
"completion_tokens": 47,
|
|
"prompt_tokens": 12,
|
|
"total_tokens": 59
|
|
},
|
|
"_response_ms": 1753.426
|
|
}
|
|
|
|
```
|
|
|
|
## `/embeddings`
|
|
|
|
### Request Format
|
|
Input, Output and Exceptions are mapped to the OpenAI format for all supported models
|
|
|
|
<Tabs>
|
|
<TabItem value="openai" label="OpenAI Python v1.0.0+">
|
|
|
|
```python
|
|
import openai
|
|
from openai import OpenAI
|
|
|
|
# set base_url to your proxy server
|
|
# set api_key to send to proxy server
|
|
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:8000")
|
|
|
|
response = openai.embeddings.create(
|
|
input=["hello from litellm"],
|
|
model="text-embedding-ada-002"
|
|
)
|
|
|
|
print(response)
|
|
|
|
```
|
|
</TabItem>
|
|
<TabItem value="Curl" label="Curl Request">
|
|
|
|
```shell
|
|
curl --location 'http://0.0.0.0:8000/embeddings' \
|
|
--header 'Content-Type: application/json' \
|
|
--data ' {
|
|
"model": "text-embedding-ada-002",
|
|
"input": ["write a litellm poem"]
|
|
}'
|
|
```
|
|
</TabItem>
|
|
|
|
<TabItem value="langchain-embedding" label="Langchain Embeddings">
|
|
|
|
```python
|
|
from langchain.embeddings import OpenAIEmbeddings
|
|
|
|
embeddings = OpenAIEmbeddings(model="sagemaker-embeddings", openai_api_base="http://0.0.0.0:8000", openai_api_key="temp-key")
|
|
|
|
|
|
text = "This is a test document."
|
|
|
|
query_result = embeddings.embed_query(text)
|
|
|
|
print(f"SAGEMAKER EMBEDDINGS")
|
|
print(query_result[:5])
|
|
|
|
embeddings = OpenAIEmbeddings(model="bedrock-embeddings", openai_api_base="http://0.0.0.0:8000", openai_api_key="temp-key")
|
|
|
|
text = "This is a test document."
|
|
|
|
query_result = embeddings.embed_query(text)
|
|
|
|
print(f"BEDROCK EMBEDDINGS")
|
|
print(query_result[:5])
|
|
|
|
embeddings = OpenAIEmbeddings(model="bedrock-titan-embeddings", openai_api_base="http://0.0.0.0:8000", openai_api_key="temp-key")
|
|
|
|
text = "This is a test document."
|
|
|
|
query_result = embeddings.embed_query(text)
|
|
|
|
print(f"TITAN EMBEDDINGS")
|
|
print(query_result[:5])
|
|
```
|
|
</TabItem>
|
|
</Tabs>
|
|
|
|
|
|
### Response Format
|
|
|
|
```json
|
|
{
|
|
"object": "list",
|
|
"data": [
|
|
{
|
|
"object": "embedding",
|
|
"embedding": [
|
|
0.0023064255,
|
|
-0.009327292,
|
|
....
|
|
-0.0028842222,
|
|
],
|
|
"index": 0
|
|
}
|
|
],
|
|
"model": "text-embedding-ada-002",
|
|
"usage": {
|
|
"prompt_tokens": 8,
|
|
"total_tokens": 8
|
|
}
|
|
}
|
|
|
|
```
|
|
|
|
|
|
## Advanced
|
|
|
|
### Pass User LLM API Keys, Fallbacks
|
|
Allows users to pass their model list, api base, OpenAI API key (any LiteLLM supported provider) to make requests
|
|
|
|
:::info
|
|
|
|
**You can pass a litellm.RouterConfig as `user_config`, See all supported params here https://github.com/BerriAI/litellm/blob/main/litellm/types/router.py **
|
|
|
|
:::
|
|
|
|
<Tabs>
|
|
|
|
<TabItem value="openai-py" label="OpenAI Python">
|
|
|
|
#### Step 1: Define user model list & config
|
|
```python
|
|
import os
|
|
|
|
user_config = {
|
|
'model_list': [
|
|
{
|
|
'model_name': 'user-azure-instance',
|
|
'litellm_params': {
|
|
'model': 'azure/chatgpt-v-2',
|
|
'api_key': os.getenv('AZURE_API_KEY'),
|
|
'api_version': os.getenv('AZURE_API_VERSION'),
|
|
'api_base': os.getenv('AZURE_API_BASE'),
|
|
'timeout': 10,
|
|
},
|
|
'tpm': 240000,
|
|
'rpm': 1800,
|
|
},
|
|
{
|
|
'model_name': 'user-openai-instance',
|
|
'litellm_params': {
|
|
'model': 'gpt-3.5-turbo',
|
|
'api_key': os.getenv('OPENAI_API_KEY'),
|
|
'timeout': 10,
|
|
},
|
|
'tpm': 240000,
|
|
'rpm': 1800,
|
|
},
|
|
],
|
|
'num_retries': 2,
|
|
'allowed_fails': 3,
|
|
'fallbacks': [
|
|
{
|
|
'user-azure-instance': ['user-openai-instance']
|
|
}
|
|
]
|
|
}
|
|
|
|
|
|
```
|
|
|
|
#### Step 2: Send user_config in `extra_body`
|
|
```python
|
|
import openai
|
|
client = openai.OpenAI(
|
|
api_key="sk-1234",
|
|
base_url="http://0.0.0.0:8000"
|
|
)
|
|
|
|
# send request to `user-azure-instance`
|
|
response = client.chat.completions.create(model="user-azure-instance", messages = [
|
|
{
|
|
"role": "user",
|
|
"content": "this is a test request, write a short poem"
|
|
}
|
|
],
|
|
extra_body={
|
|
"user_config": user_config
|
|
}
|
|
) # 👈 User config
|
|
|
|
print(response)
|
|
```
|
|
|
|
</TabItem>
|
|
|
|
<TabItem value="openai-js" label="OpenAI JS">
|
|
|
|
#### Step 1: Define user model list & config
|
|
```javascript
|
|
const os = require('os');
|
|
|
|
const userConfig = {
|
|
model_list: [
|
|
{
|
|
model_name: 'user-azure-instance',
|
|
litellm_params: {
|
|
model: 'azure/chatgpt-v-2',
|
|
api_key: process.env.AZURE_API_KEY,
|
|
api_version: process.env.AZURE_API_VERSION,
|
|
api_base: process.env.AZURE_API_BASE,
|
|
timeout: 10,
|
|
},
|
|
tpm: 240000,
|
|
rpm: 1800,
|
|
},
|
|
{
|
|
model_name: 'user-openai-instance',
|
|
litellm_params: {
|
|
model: 'gpt-3.5-turbo',
|
|
api_key: process.env.OPENAI_API_KEY,
|
|
timeout: 10,
|
|
},
|
|
tpm: 240000,
|
|
rpm: 1800,
|
|
},
|
|
],
|
|
num_retries: 2,
|
|
allowed_fails: 3,
|
|
fallbacks: [
|
|
{
|
|
'user-azure-instance': ['user-openai-instance']
|
|
}
|
|
]
|
|
};
|
|
```
|
|
|
|
#### Step 2: Send `user_config` as a param to `openai.chat.completions.create`
|
|
|
|
```javascript
|
|
const { OpenAI } = require('openai');
|
|
|
|
const openai = new OpenAI({
|
|
apiKey: "sk-1234",
|
|
baseURL: "http://0.0.0.0:8000"
|
|
});
|
|
|
|
async function main() {
|
|
const chatCompletion = await openai.chat.completions.create({
|
|
messages: [{ role: 'user', content: 'Say this is a test' }],
|
|
model: 'gpt-3.5-turbo',
|
|
user_config: userConfig // # 👈 User config
|
|
});
|
|
}
|
|
|
|
main();
|
|
```
|
|
|
|
</TabItem>
|
|
|
|
</Tabs>
|
|
|
|
### Pass User LLM API Keys
|
|
Allows your users to pass in their OpenAI API key (any LiteLLM supported provider) to make requests
|
|
|
|
Here's how to do it:
|
|
|
|
```python
|
|
import openai
|
|
client = openai.OpenAI(
|
|
api_key="sk-1234",
|
|
base_url="http://0.0.0.0:8000"
|
|
)
|
|
|
|
# request sent to model set on litellm proxy, `litellm --model`
|
|
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
|
{
|
|
"role": "user",
|
|
"content": "this is a test request, write a short poem"
|
|
}
|
|
],
|
|
extra_body={"api_key": "my-bad-key"}) # 👈 User Key
|
|
|
|
print(response)
|
|
```
|
|
|
|
More examples:
|
|
<Tabs>
|
|
<TabItem value="openai-py" label="Azure Credentials">
|
|
|
|
Pass in the litellm_params (E.g. api_key, api_base, etc.) via the `extra_body` parameter in the OpenAI client.
|
|
|
|
```python
|
|
import openai
|
|
client = openai.OpenAI(
|
|
api_key="sk-1234",
|
|
base_url="http://0.0.0.0:8000"
|
|
)
|
|
|
|
# request sent to model set on litellm proxy, `litellm --model`
|
|
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
|
{
|
|
"role": "user",
|
|
"content": "this is a test request, write a short poem"
|
|
}
|
|
],
|
|
extra_body={
|
|
"api_key": "my-azure-key",
|
|
"api_base": "my-azure-base",
|
|
"api_version": "my-azure-version"
|
|
}) # 👈 User Key
|
|
|
|
print(response)
|
|
```
|
|
|
|
|
|
</TabItem>
|
|
<TabItem value="openai-js" label="OpenAI JS">
|
|
|
|
For JS, the OpenAI client accepts passing params in the `create(..)` body as normal.
|
|
|
|
```javascript
|
|
const { OpenAI } = require('openai');
|
|
|
|
const openai = new OpenAI({
|
|
apiKey: "sk-1234",
|
|
baseURL: "http://0.0.0.0:8000"
|
|
});
|
|
|
|
async function main() {
|
|
const chatCompletion = await openai.chat.completions.create({
|
|
messages: [{ role: 'user', content: 'Say this is a test' }],
|
|
model: 'gpt-3.5-turbo',
|
|
api_key: "my-bad-key" // 👈 User Key
|
|
});
|
|
}
|
|
|
|
main();
|
|
```
|
|
</TabItem>
|
|
</Tabs> |