forked from phoenix/litellm-mirror
(docs) proxy
This commit is contained in:
parent
a98d752f7b
commit
20236c1c69
4 changed files with 147 additions and 146 deletions
|
@ -3,7 +3,7 @@ import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
|
||||||
# Logging - Custom Callbacks, OpenTelemetry, Langfuse, Sentry
|
# Logging - Custom Callbacks, Langfuse, OpenTelemetry, Sentry
|
||||||
|
|
||||||
Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTelemetry, LangFuse, DynamoDB
|
Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTelemetry, LangFuse, DynamoDB
|
||||||
|
|
||||||
|
@ -290,6 +290,145 @@ ModelResponse(
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Logging Proxy Input/Output - Langfuse
|
||||||
|
We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse
|
||||||
|
|
||||||
|
**Step 1** Install langfuse
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install langfuse>=2.0.0
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo
|
||||||
|
litellm_settings:
|
||||||
|
success_callback: ["langfuse"]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 3**: Start the proxy, make a test request
|
||||||
|
|
||||||
|
Start proxy
|
||||||
|
```shell
|
||||||
|
litellm --config config.yaml --debug
|
||||||
|
```
|
||||||
|
|
||||||
|
Test Request
|
||||||
|
```
|
||||||
|
litellm --test
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected output on Langfuse
|
||||||
|
|
||||||
|
<Image img={require('../../img/langfuse_small.png')} />
|
||||||
|
|
||||||
|
### Logging Metadata to Langfuse
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
|
||||||
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
|
Pass `metadata` as part of the request body
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what llm are you"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"generation_name": "ishaan-test-generation",
|
||||||
|
"generation_id": "gen-id22",
|
||||||
|
"trace_id": "trace-id22",
|
||||||
|
"trace_user_id": "user-id2"
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="openai" label="OpenAI v1.0.0+">
|
||||||
|
|
||||||
|
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:8000"
|
||||||
|
)
|
||||||
|
|
||||||
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
extra_body={
|
||||||
|
"metadata": {
|
||||||
|
"generation_name": "ishaan-generation-openai-client",
|
||||||
|
"generation_id": "openai-client-gen-id22",
|
||||||
|
"trace_id": "openai-client-trace-id22",
|
||||||
|
"trace_user_id": "openai-client-user-id2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="langchain" label="Langchain">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from langchain.chat_models import ChatOpenAI
|
||||||
|
from langchain.prompts.chat import (
|
||||||
|
ChatPromptTemplate,
|
||||||
|
HumanMessagePromptTemplate,
|
||||||
|
SystemMessagePromptTemplate,
|
||||||
|
)
|
||||||
|
from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
|
||||||
|
chat = ChatOpenAI(
|
||||||
|
openai_api_base="http://0.0.0.0:8000",
|
||||||
|
model = "gpt-3.5-turbo",
|
||||||
|
temperature=0.1,
|
||||||
|
extra_body={
|
||||||
|
"metadata": {
|
||||||
|
"generation_name": "ishaan-generation-langchain-client",
|
||||||
|
"generation_id": "langchain-client-gen-id22",
|
||||||
|
"trace_id": "langchain-client-trace-id22",
|
||||||
|
"trace_user_id": "langchain-client-user-id2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
SystemMessage(
|
||||||
|
content="You are a helpful assistant that im using to make a test request to."
|
||||||
|
),
|
||||||
|
HumanMessage(
|
||||||
|
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||||
|
),
|
||||||
|
]
|
||||||
|
response = chat(messages)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## OpenTelemetry - Traceloop
|
## OpenTelemetry - Traceloop
|
||||||
|
|
||||||
Traceloop allows you to log LLM Input/Output in the OpenTelemetry format
|
Traceloop allows you to log LLM Input/Output in the OpenTelemetry format
|
||||||
|
@ -458,144 +597,6 @@ Here's the log view on Elastic Search. You can see the request `input`, `output`
|
||||||
|
|
||||||
<Image img={require('../../img/elastic_otel.png')} /> -->
|
<Image img={require('../../img/elastic_otel.png')} /> -->
|
||||||
|
|
||||||
## Logging Proxy Input/Output - Langfuse
|
|
||||||
We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse
|
|
||||||
|
|
||||||
**Step 1** Install langfuse
|
|
||||||
|
|
||||||
```shell
|
|
||||||
pip install langfuse>=2.0.0
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: gpt-3.5-turbo
|
|
||||||
litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
litellm_settings:
|
|
||||||
success_callback: ["langfuse"]
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 3**: Start the proxy, make a test request
|
|
||||||
|
|
||||||
Start proxy
|
|
||||||
```shell
|
|
||||||
litellm --config config.yaml --debug
|
|
||||||
```
|
|
||||||
|
|
||||||
Test Request
|
|
||||||
```
|
|
||||||
litellm --test
|
|
||||||
```
|
|
||||||
|
|
||||||
Expected output on Langfuse
|
|
||||||
|
|
||||||
<Image img={require('../../img/langfuse_small.png')} />
|
|
||||||
|
|
||||||
### Logging Metadata to Langfuse
|
|
||||||
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
|
|
||||||
<TabItem value="Curl" label="Curl Request">
|
|
||||||
|
|
||||||
Pass `metadata` as part of the request body
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data '{
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "what llm are you"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"generation_name": "ishaan-test-generation",
|
|
||||||
"generation_id": "gen-id22",
|
|
||||||
"trace_id": "trace-id22",
|
|
||||||
"trace_user_id": "user-id2"
|
|
||||||
}
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="openai" label="OpenAI v1.0.0+">
|
|
||||||
|
|
||||||
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
|
|
||||||
|
|
||||||
```python
|
|
||||||
import openai
|
|
||||||
client = openai.OpenAI(
|
|
||||||
api_key="anything",
|
|
||||||
base_url="http://0.0.0.0:8000"
|
|
||||||
)
|
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "this is a test request, write a short poem"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
extra_body={
|
|
||||||
"metadata": {
|
|
||||||
"generation_name": "ishaan-generation-openai-client",
|
|
||||||
"generation_id": "openai-client-gen-id22",
|
|
||||||
"trace_id": "openai-client-trace-id22",
|
|
||||||
"trace_user_id": "openai-client-user-id2"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
print(response)
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="langchain" label="Langchain">
|
|
||||||
|
|
||||||
```python
|
|
||||||
from langchain.chat_models import ChatOpenAI
|
|
||||||
from langchain.prompts.chat import (
|
|
||||||
ChatPromptTemplate,
|
|
||||||
HumanMessagePromptTemplate,
|
|
||||||
SystemMessagePromptTemplate,
|
|
||||||
)
|
|
||||||
from langchain.schema import HumanMessage, SystemMessage
|
|
||||||
|
|
||||||
chat = ChatOpenAI(
|
|
||||||
openai_api_base="http://0.0.0.0:8000",
|
|
||||||
model = "gpt-3.5-turbo",
|
|
||||||
temperature=0.1,
|
|
||||||
extra_body={
|
|
||||||
"metadata": {
|
|
||||||
"generation_name": "ishaan-generation-langchain-client",
|
|
||||||
"generation_id": "langchain-client-gen-id22",
|
|
||||||
"trace_id": "langchain-client-trace-id22",
|
|
||||||
"trace_user_id": "langchain-client-user-id2"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
SystemMessage(
|
|
||||||
content="You are a helpful assistant that im using to make a test request to."
|
|
||||||
),
|
|
||||||
HumanMessage(
|
|
||||||
content="test from litellm. tell me why it's amazing in 1 sentence"
|
|
||||||
),
|
|
||||||
]
|
|
||||||
response = chat(messages)
|
|
||||||
|
|
||||||
print(response)
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
## Logging Proxy Input/Output - DynamoDB
|
## Logging Proxy Input/Output - DynamoDB
|
||||||
|
|
||||||
We will use the `--config` to set
|
We will use the `--config` to set
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# [Tutorial] Streaming token usage Logging
|
# Track Token Usage (Streaming)
|
||||||
|
|
||||||
### Step 1 - Create your custom `litellm` callback class
|
### Step 1 - Create your custom `litellm` callback class
|
||||||
We use `litellm.integrations.custom_logger` for this, **more details about litellm custom callbacks [here](https://docs.litellm.ai/docs/observability/custom_callback)**
|
We use `litellm.integrations.custom_logger` for this, **more details about litellm custom callbacks [here](https://docs.litellm.ai/docs/observability/custom_callback)**
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# Set Budgets + Rate Limits per user
|
# 💰 Budgets, Rate Limits per user
|
||||||
|
|
||||||
Requirements:
|
Requirements:
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ LiteLLM exposes a `/user/new` endpoint to create budgets for users, that persist
|
||||||
|
|
||||||
This is documented in the swagger (live on your server root endpoint - e.g. `http://0.0.0.0:8000/`). Here's an example request.
|
This is documented in the swagger (live on your server root endpoint - e.g. `http://0.0.0.0:8000/`). Here's an example request.
|
||||||
|
|
||||||
```curl
|
```shell
|
||||||
curl --location 'http://localhost:8000/user/new' \
|
curl --location 'http://localhost:8000/user/new' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
|
@ -20,7 +20,7 @@ The request is a normal `/key/generate` request body + a `max_budget` field.
|
||||||
|
|
||||||
**Sample Response**
|
**Sample Response**
|
||||||
|
|
||||||
```curl
|
```shell
|
||||||
{
|
{
|
||||||
"key": "sk-YF2OxDbrgd1y2KgwxmEA2w",
|
"key": "sk-YF2OxDbrgd1y2KgwxmEA2w",
|
||||||
"expires": "2023-12-22T09:53:13.861000Z",
|
"expires": "2023-12-22T09:53:13.861000Z",
|
||||||
|
@ -34,7 +34,7 @@ The request is a normal `/key/generate` request body + a `max_budget` field.
|
||||||
|
|
||||||
Set max parallel requests a user can make, when you create user keys - `/key/generate`.
|
Set max parallel requests a user can make, when you create user keys - `/key/generate`.
|
||||||
|
|
||||||
```bash
|
```shell
|
||||||
curl --location 'http://0.0.0.0:8000/key/generate' \
|
curl --location 'http://0.0.0.0:8000/key/generate' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
|
|
|
@ -111,10 +111,10 @@ const sidebars = {
|
||||||
"proxy/health",
|
"proxy/health",
|
||||||
"proxy/call_hooks",
|
"proxy/call_hooks",
|
||||||
"proxy/caching",
|
"proxy/caching",
|
||||||
"proxy/streaming_logging",
|
|
||||||
"proxy/logging",
|
"proxy/logging",
|
||||||
"proxy/cli",
|
"proxy/streaming_logging",
|
||||||
"proxy/deploy",
|
"proxy/deploy",
|
||||||
|
"proxy/cli",
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"routing",
|
"routing",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue