12 KiB
import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem';
Use with Langchain, OpenAI SDK, LlamaIndex, Curl
:::info
Input, Output, Exceptions are mapped to the OpenAI format for all supported models
:::
How to send requests to the proxy, pass metadata, allow users to pass in their OpenAI API key
/chat/completions
Request Format
Set extra_body={"metadata": { }}
to metadata
you want to pass
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:8000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"metadata": {
"generation_name": "ishaan-generation-openai-client",
"generation_id": "openai-client-gen-id22",
"trace_id": "openai-client-trace-id22",
"trace_user_id": "openai-client-user-id2"
}
}
)
print(response)
import os, dotenv
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
llm = AzureOpenAI(
engine="azure-gpt-3.5", # model_name on litellm proxy
temperature=0.0,
azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
api_key="sk-1234", # litellm proxy API Key
api_version="2023-07-01-preview",
)
embed_model = AzureOpenAIEmbedding(
deployment_name="azure-embedding-model",
azure_endpoint="http://0.0.0.0:4000",
api_key="sk-1234",
api_version="2023-07-01-preview",
)
documents = SimpleDirectoryReader("llama_index_data").load_data()
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(response)
Pass metadata
as part of the request body
curl --location 'http://0.0.0.0:8000/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
"metadata": {
"generation_name": "ishaan-test-generation",
"generation_id": "gen-id22",
"trace_id": "trace-id22",
"trace_user_id": "user-id2"
}
}'
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:8000",
model = "gpt-3.5-turbo",
temperature=0.1,
extra_body={
"metadata": {
"generation_name": "ishaan-generation-langchain-client",
"generation_id": "langchain-client-gen-id22",
"trace_id": "langchain-client-trace-id22",
"trace_user_id": "langchain-client-user-id2"
}
}
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
Response Format
{
"id": "chatcmpl-8c5qbGTILZa1S4CK3b31yj5N40hFN",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "As an AI language model, I do not have a physical form or personal preferences. However, I am programmed to assist with various topics and provide information on a wide range of subjects. Is there something specific you would like assistance with?",
"role": "assistant"
}
}
],
"created": 1704089632,
"model": "gpt-35-turbo",
"object": "chat.completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 47,
"prompt_tokens": 12,
"total_tokens": 59
},
"_response_ms": 1753.426
}
/embeddings
Request Format
Input, Output and Exceptions are mapped to the OpenAI format for all supported models
import openai
from openai import OpenAI
# set base_url to your proxy server
# set api_key to send to proxy server
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:8000")
response = openai.embeddings.create(
input=["hello from litellm"],
model="text-embedding-ada-002"
)
print(response)
curl --location 'http://0.0.0.0:8000/embeddings' \
--header 'Content-Type: application/json' \
--data ' {
"model": "text-embedding-ada-002",
"input": ["write a litellm poem"]
}'
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="sagemaker-embeddings", openai_api_base="http://0.0.0.0:8000", openai_api_key="temp-key")
text = "This is a test document."
query_result = embeddings.embed_query(text)
print(f"SAGEMAKER EMBEDDINGS")
print(query_result[:5])
embeddings = OpenAIEmbeddings(model="bedrock-embeddings", openai_api_base="http://0.0.0.0:8000", openai_api_key="temp-key")
text = "This is a test document."
query_result = embeddings.embed_query(text)
print(f"BEDROCK EMBEDDINGS")
print(query_result[:5])
embeddings = OpenAIEmbeddings(model="bedrock-titan-embeddings", openai_api_base="http://0.0.0.0:8000", openai_api_key="temp-key")
text = "This is a test document."
query_result = embeddings.embed_query(text)
print(f"TITAN EMBEDDINGS")
print(query_result[:5])
Response Format
{
"object": "list",
"data": [
{
"object": "embedding",
"embedding": [
0.0023064255,
-0.009327292,
....
-0.0028842222,
],
"index": 0
}
],
"model": "text-embedding-ada-002",
"usage": {
"prompt_tokens": 8,
"total_tokens": 8
}
}
Advanced
Pass User LLM API Keys, Fallbacks
Allows users to pass their model list, api base, OpenAI API key (any LiteLLM supported provider) to make requests
:::info
**You can pass a litellm.RouterConfig as user_config
, See all supported params here https://github.com/BerriAI/litellm/blob/main/litellm/types/router.py **
:::
Step 1: Define user model list & config
import os
user_config = {
'model_list': [
{
'model_name': 'user-azure-instance',
'litellm_params': {
'model': 'azure/chatgpt-v-2',
'api_key': os.getenv('AZURE_API_KEY'),
'api_version': os.getenv('AZURE_API_VERSION'),
'api_base': os.getenv('AZURE_API_BASE'),
'timeout': 10,
},
'tpm': 240000,
'rpm': 1800,
},
{
'model_name': 'user-openai-instance',
'litellm_params': {
'model': 'gpt-3.5-turbo',
'api_key': os.getenv('OPENAI_API_KEY'),
'timeout': 10,
},
'tpm': 240000,
'rpm': 1800,
},
],
'num_retries': 2,
'allowed_fails': 3,
'fallbacks': [
{
'user-azure-instance': ['user-openai-instance']
}
]
}
Step 2: Send user_config in extra_body
import openai
client = openai.OpenAI(
api_key="sk-1234",
base_url="http://0.0.0.0:8000"
)
# send request to `user-azure-instance`
response = client.chat.completions.create(model="user-azure-instance", messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"user_config": user_config
}
) # 👈 User config
print(response)
Step 1: Define user model list & config
const os = require('os');
const userConfig = {
model_list: [
{
model_name: 'user-azure-instance',
litellm_params: {
model: 'azure/chatgpt-v-2',
api_key: process.env.AZURE_API_KEY,
api_version: process.env.AZURE_API_VERSION,
api_base: process.env.AZURE_API_BASE,
timeout: 10,
},
tpm: 240000,
rpm: 1800,
},
{
model_name: 'user-openai-instance',
litellm_params: {
model: 'gpt-3.5-turbo',
api_key: process.env.OPENAI_API_KEY,
timeout: 10,
},
tpm: 240000,
rpm: 1800,
},
],
num_retries: 2,
allowed_fails: 3,
fallbacks: [
{
'user-azure-instance': ['user-openai-instance']
}
]
};
Step 2: Send user_config
as a param to openai.chat.completions.create
const { OpenAI } = require('openai');
const openai = new OpenAI({
apiKey: "sk-1234",
baseURL: "http://0.0.0.0:8000"
});
async function main() {
const chatCompletion = await openai.chat.completions.create({
messages: [{ role: 'user', content: 'Say this is a test' }],
model: 'gpt-3.5-turbo',
user_config: userConfig // # 👈 User config
});
}
main();
Pass User LLM API Keys
Allows your users to pass in their OpenAI API key (any LiteLLM supported provider) to make requests
Here's how to do it:
import openai
client = openai.OpenAI(
api_key="sk-1234",
base_url="http://0.0.0.0:8000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={"api_key": "my-bad-key"}) # 👈 User Key
print(response)
More examples:
Pass in the litellm_params (E.g. api_key, api_base, etc.) via the extra_body
parameter in the OpenAI client.
import openai
client = openai.OpenAI(
api_key="sk-1234",
base_url="http://0.0.0.0:8000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"api_key": "my-azure-key",
"api_base": "my-azure-base",
"api_version": "my-azure-version"
}) # 👈 User Key
print(response)
For JS, the OpenAI client accepts passing params in the create(..)
body as normal.
const { OpenAI } = require('openai');
const openai = new OpenAI({
apiKey: "sk-1234",
baseURL: "http://0.0.0.0:8000"
});
async function main() {
const chatCompletion = await openai.chat.completions.create({
messages: [{ role: 'user', content: 'Say this is a test' }],
model: 'gpt-3.5-turbo',
api_key: "my-bad-key" // 👈 User Key
});
}
main();